aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/tracepoints.txt13
-rw-r--r--arch/x86/kernel/ftrace.c6
-rw-r--r--arch/x86/kernel/kprobes.c17
-rw-r--r--fs/debugfs/inode.c16
-rw-r--r--include/linux/debugfs.h8
-rw-r--r--include/linux/ftrace.h15
-rw-r--r--include/linux/ring_buffer.h11
-rw-r--r--include/linux/sched.h2
-rw-r--r--init/main.c3
-rw-r--r--kernel/trace/Kconfig6
-rw-r--r--kernel/trace/Makefile1
-rw-r--r--kernel/trace/blktrace.c473
-rw-r--r--kernel/trace/ftrace.c68
-rw-r--r--kernel/trace/ring_buffer.c117
-rw-r--r--kernel/trace/trace.c50
-rw-r--r--kernel/trace/trace.h77
-rw-r--r--kernel/trace/trace_clock.c1
-rw-r--r--kernel/trace/trace_events.c203
-rw-r--r--kernel/trace/trace_events_filter.c427
-rw-r--r--kernel/trace/trace_events_stage_2.h45
-rw-r--r--kernel/trace/trace_events_stage_3.h9
-rw-r--r--kernel/trace/trace_functions_graph.c8
-rw-r--r--kernel/trace/trace_nop.c1
-rw-r--r--kernel/trace/trace_output.c19
-rw-r--r--kernel/trace/trace_output.h33
-rw-r--r--kernel/trace/trace_stat.c47
-rw-r--r--kernel/trace/trace_workqueue.c12
-rw-r--r--samples/tracepoints/tracepoint-sample.c24
28 files changed, 1330 insertions, 382 deletions
diff --git a/Documentation/tracepoints.txt b/Documentation/tracepoints.txt
index 4ff43c6de299..c0e1ceed75a4 100644
--- a/Documentation/tracepoints.txt
+++ b/Documentation/tracepoints.txt
@@ -103,13 +103,14 @@ used to export the defined tracepoints.
103 103
104* Probe / tracepoint example 104* Probe / tracepoint example
105 105
106See the example provided in samples/tracepoints/src 106See the example provided in samples/tracepoints
107 107
108Compile them with your kernel. 108Compile them with your kernel. They are built during 'make' (not
109'make modules') when CONFIG_SAMPLE_TRACEPOINTS=m.
109 110
110Run, as root : 111Run, as root :
111modprobe tracepoint-example (insmod order is not important) 112modprobe tracepoint-sample (insmod order is not important)
112modprobe tracepoint-probe-example 113modprobe tracepoint-probe-sample
113cat /proc/tracepoint-example (returns an expected error) 114cat /proc/tracepoint-sample (returns an expected error)
114rmmod tracepoint-example tracepoint-probe-example 115rmmod tracepoint-sample tracepoint-probe-sample
115dmesg 116dmesg
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 57b33edb7ce3..61df77532120 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -410,7 +410,6 @@ int ftrace_disable_ftrace_graph_caller(void)
410void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) 410void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
411{ 411{
412 unsigned long old; 412 unsigned long old;
413 unsigned long long calltime;
414 int faulted; 413 int faulted;
415 struct ftrace_graph_ent trace; 414 struct ftrace_graph_ent trace;
416 unsigned long return_hooker = (unsigned long) 415 unsigned long return_hooker = (unsigned long)
@@ -453,10 +452,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
453 return; 452 return;
454 } 453 }
455 454
456 calltime = trace_clock_local(); 455 if (ftrace_push_return_trace(old, self_addr, &trace.depth) == -EBUSY) {
457
458 if (ftrace_push_return_trace(old, calltime,
459 self_addr, &trace.depth) == -EBUSY) {
460 *parent = old; 456 *parent = old;
461 return; 457 return;
462 } 458 }
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 4558dd3918cf..759095d53a06 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -638,13 +638,13 @@ static void __used __kprobes kretprobe_trampoline_holder(void)
638#else 638#else
639 " pushf\n" 639 " pushf\n"
640 /* 640 /*
641 * Skip cs, ip, orig_ax. 641 * Skip cs, ip, orig_ax and gs.
642 * trampoline_handler() will plug in these values 642 * trampoline_handler() will plug in these values
643 */ 643 */
644 " subl $12, %esp\n" 644 " subl $16, %esp\n"
645 " pushl %fs\n" 645 " pushl %fs\n"
646 " pushl %ds\n"
647 " pushl %es\n" 646 " pushl %es\n"
647 " pushl %ds\n"
648 " pushl %eax\n" 648 " pushl %eax\n"
649 " pushl %ebp\n" 649 " pushl %ebp\n"
650 " pushl %edi\n" 650 " pushl %edi\n"
@@ -655,10 +655,10 @@ static void __used __kprobes kretprobe_trampoline_holder(void)
655 " movl %esp, %eax\n" 655 " movl %esp, %eax\n"
656 " call trampoline_handler\n" 656 " call trampoline_handler\n"
657 /* Move flags to cs */ 657 /* Move flags to cs */
658 " movl 52(%esp), %edx\n" 658 " movl 56(%esp), %edx\n"
659 " movl %edx, 48(%esp)\n" 659 " movl %edx, 52(%esp)\n"
660 /* Replace saved flags with true return address. */ 660 /* Replace saved flags with true return address. */
661 " movl %eax, 52(%esp)\n" 661 " movl %eax, 56(%esp)\n"
662 " popl %ebx\n" 662 " popl %ebx\n"
663 " popl %ecx\n" 663 " popl %ecx\n"
664 " popl %edx\n" 664 " popl %edx\n"
@@ -666,8 +666,8 @@ static void __used __kprobes kretprobe_trampoline_holder(void)
666 " popl %edi\n" 666 " popl %edi\n"
667 " popl %ebp\n" 667 " popl %ebp\n"
668 " popl %eax\n" 668 " popl %eax\n"
669 /* Skip ip, orig_ax, es, ds, fs */ 669 /* Skip ds, es, fs, gs, orig_ax and ip */
670 " addl $20, %esp\n" 670 " addl $24, %esp\n"
671 " popf\n" 671 " popf\n"
672#endif 672#endif
673 " ret\n"); 673 " ret\n");
@@ -691,6 +691,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
691 regs->cs = __KERNEL_CS; 691 regs->cs = __KERNEL_CS;
692#else 692#else
693 regs->cs = __KERNEL_CS | get_kernel_rpl(); 693 regs->cs = __KERNEL_CS | get_kernel_rpl();
694 regs->gs = 0;
694#endif 695#endif
695 regs->ip = trampoline_address; 696 regs->ip = trampoline_address;
696 regs->orig_ax = ~0UL; 697 regs->orig_ax = ~0UL;
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 81ae9ea3c6e1..0662ba6de85a 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -30,6 +30,7 @@
30 30
31static struct vfsmount *debugfs_mount; 31static struct vfsmount *debugfs_mount;
32static int debugfs_mount_count; 32static int debugfs_mount_count;
33static bool debugfs_registered;
33 34
34static struct inode *debugfs_get_inode(struct super_block *sb, int mode, dev_t dev) 35static struct inode *debugfs_get_inode(struct super_block *sb, int mode, dev_t dev)
35{ 36{
@@ -496,6 +497,16 @@ exit:
496} 497}
497EXPORT_SYMBOL_GPL(debugfs_rename); 498EXPORT_SYMBOL_GPL(debugfs_rename);
498 499
500/**
501 * debugfs_initialized - Tells whether debugfs has been registered
502 */
503bool debugfs_initialized(void)
504{
505 return debugfs_registered;
506}
507EXPORT_SYMBOL_GPL(debugfs_initialized);
508
509
499static struct kobject *debug_kobj; 510static struct kobject *debug_kobj;
500 511
501static int __init debugfs_init(void) 512static int __init debugfs_init(void)
@@ -509,11 +520,16 @@ static int __init debugfs_init(void)
509 retval = register_filesystem(&debug_fs_type); 520 retval = register_filesystem(&debug_fs_type);
510 if (retval) 521 if (retval)
511 kobject_put(debug_kobj); 522 kobject_put(debug_kobj);
523 else
524 debugfs_registered = true;
525
512 return retval; 526 return retval;
513} 527}
514 528
515static void __exit debugfs_exit(void) 529static void __exit debugfs_exit(void)
516{ 530{
531 debugfs_registered = false;
532
517 simple_release_fs(&debugfs_mount, &debugfs_mount_count); 533 simple_release_fs(&debugfs_mount, &debugfs_mount_count);
518 unregister_filesystem(&debug_fs_type); 534 unregister_filesystem(&debug_fs_type);
519 kobject_put(debug_kobj); 535 kobject_put(debug_kobj);
diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h
index af0e01d4c663..eb5c2ba2f81a 100644
--- a/include/linux/debugfs.h
+++ b/include/linux/debugfs.h
@@ -71,6 +71,9 @@ struct dentry *debugfs_create_bool(const char *name, mode_t mode,
71struct dentry *debugfs_create_blob(const char *name, mode_t mode, 71struct dentry *debugfs_create_blob(const char *name, mode_t mode,
72 struct dentry *parent, 72 struct dentry *parent,
73 struct debugfs_blob_wrapper *blob); 73 struct debugfs_blob_wrapper *blob);
74
75bool debugfs_initialized(void);
76
74#else 77#else
75 78
76#include <linux/err.h> 79#include <linux/err.h>
@@ -183,6 +186,11 @@ static inline struct dentry *debugfs_create_blob(const char *name, mode_t mode,
183 return ERR_PTR(-ENODEV); 186 return ERR_PTR(-ENODEV);
184} 187}
185 188
189static inline bool debugfs_initialized(void)
190{
191 return false;
192}
193
186#endif 194#endif
187 195
188#endif 196#endif
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index db3fed630db3..015a3d22cf74 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -145,9 +145,15 @@ enum {
145}; 145};
146 146
147struct dyn_ftrace { 147struct dyn_ftrace {
148 unsigned long ip; /* address of mcount call-site */ 148 union {
149 unsigned long flags; 149 unsigned long ip; /* address of mcount call-site */
150 struct dyn_arch_ftrace arch; 150 struct dyn_ftrace *freelist;
151 };
152 union {
153 unsigned long flags;
154 struct dyn_ftrace *newlist;
155 };
156 struct dyn_arch_ftrace arch;
151}; 157};
152 158
153int ftrace_force_update(void); 159int ftrace_force_update(void);
@@ -369,8 +375,7 @@ struct ftrace_ret_stack {
369extern void return_to_handler(void); 375extern void return_to_handler(void);
370 376
371extern int 377extern int
372ftrace_push_return_trace(unsigned long ret, unsigned long long time, 378ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth);
373 unsigned long func, int *depth);
374extern void 379extern void
375ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret); 380ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret);
376 381
diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index 9e6052bd1a1c..e1b7b2173885 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -18,10 +18,13 @@ struct ring_buffer_event {
18/** 18/**
19 * enum ring_buffer_type - internal ring buffer types 19 * enum ring_buffer_type - internal ring buffer types
20 * 20 *
21 * @RINGBUF_TYPE_PADDING: Left over page padding 21 * @RINGBUF_TYPE_PADDING: Left over page padding or discarded event
22 * array is ignored 22 * If time_delta is 0:
23 * size is variable depending on how much 23 * array is ignored
24 * size is variable depending on how much
24 * padding is needed 25 * padding is needed
26 * If time_delta is non zero:
27 * everything else same as RINGBUF_TYPE_DATA
25 * 28 *
26 * @RINGBUF_TYPE_TIME_EXTEND: Extend the time delta 29 * @RINGBUF_TYPE_TIME_EXTEND: Extend the time delta
27 * array[0] = time delta (28 .. 59) 30 * array[0] = time delta (28 .. 59)
@@ -65,6 +68,8 @@ ring_buffer_event_time_delta(struct ring_buffer_event *event)
65 return event->time_delta; 68 return event->time_delta;
66} 69}
67 70
71void ring_buffer_event_discard(struct ring_buffer_event *event);
72
68/* 73/*
69 * size is in bytes for each per CPU buffer. 74 * size is in bytes for each per CPU buffer.
70 */ 75 */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 89cd308cc7a5..471e36d30123 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1409,6 +1409,8 @@ struct task_struct {
1409 int curr_ret_stack; 1409 int curr_ret_stack;
1410 /* Stack of return addresses for return function tracing */ 1410 /* Stack of return addresses for return function tracing */
1411 struct ftrace_ret_stack *ret_stack; 1411 struct ftrace_ret_stack *ret_stack;
1412 /* time stamp for last schedule */
1413 unsigned long long ftrace_timestamp;
1412 /* 1414 /*
1413 * Number of functions that haven't been traced 1415 * Number of functions that haven't been traced
1414 * because of depth overrun. 1416 * because of depth overrun.
diff --git a/init/main.c b/init/main.c
index 20d784ab5ef8..b0097d2b63ae 100644
--- a/init/main.c
+++ b/init/main.c
@@ -772,6 +772,7 @@ static void __init do_basic_setup(void)
772{ 772{
773 rcu_init_sched(); /* needed by module_init stage. */ 773 rcu_init_sched(); /* needed by module_init stage. */
774 init_workqueues(); 774 init_workqueues();
775 cpuset_init_smp();
775 usermodehelper_init(); 776 usermodehelper_init();
776 driver_init(); 777 driver_init();
777 init_irq_proc(); 778 init_irq_proc();
@@ -865,8 +866,6 @@ static int __init kernel_init(void * unused)
865 smp_init(); 866 smp_init();
866 sched_init_smp(); 867 sched_init_smp();
867 868
868 cpuset_init_smp();
869
870 do_basic_setup(); 869 do_basic_setup();
871 870
872 /* 871 /*
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index b0a46f889659..8a4d72931042 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -63,7 +63,11 @@ config TRACING
63# 63#
64config TRACING_SUPPORT 64config TRACING_SUPPORT
65 bool 65 bool
66 depends on TRACE_IRQFLAGS_SUPPORT 66 # PPC32 has no irqflags tracing support, but it can use most of the
67 # tracers anyway, they were tested to build and work. Note that new
68 # exceptions to this list aren't welcomed, better implement the
69 # irqflags tracing for your architecture.
70 depends on TRACE_IRQFLAGS_SUPPORT || PPC32
67 depends on STACKTRACE_SUPPORT 71 depends on STACKTRACE_SUPPORT
68 default y 72 default y
69 73
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 0e45c206c2f9..2630f5121ec1 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -45,5 +45,6 @@ obj-$(CONFIG_EVENT_TRACER) += events.o
45obj-$(CONFIG_EVENT_TRACER) += trace_export.o 45obj-$(CONFIG_EVENT_TRACER) += trace_export.o
46obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o 46obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o
47obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o 47obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o
48obj-$(CONFIG_EVENT_TRACER) += trace_events_filter.o
48 49
49libftrace-y := ftrace.o 50libftrace-y := ftrace.o
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index b171778e3863..947c5b3f90c4 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -30,7 +30,7 @@
30static unsigned int blktrace_seq __read_mostly = 1; 30static unsigned int blktrace_seq __read_mostly = 1;
31 31
32static struct trace_array *blk_tr; 32static struct trace_array *blk_tr;
33static int __read_mostly blk_tracer_enabled; 33static bool blk_tracer_enabled __read_mostly;
34 34
35/* Select an alternative, minimalistic output than the original one */ 35/* Select an alternative, minimalistic output than the original one */
36#define TRACE_BLK_OPT_CLASSIC 0x1 36#define TRACE_BLK_OPT_CLASSIC 0x1
@@ -47,10 +47,9 @@ static struct tracer_flags blk_tracer_flags = {
47}; 47};
48 48
49/* Global reference count of probes */ 49/* Global reference count of probes */
50static DEFINE_MUTEX(blk_probe_mutex);
51static atomic_t blk_probes_ref = ATOMIC_INIT(0); 50static atomic_t blk_probes_ref = ATOMIC_INIT(0);
52 51
53static int blk_register_tracepoints(void); 52static void blk_register_tracepoints(void);
54static void blk_unregister_tracepoints(void); 53static void blk_unregister_tracepoints(void);
55 54
56/* 55/*
@@ -60,22 +59,39 @@ static void trace_note(struct blk_trace *bt, pid_t pid, int action,
60 const void *data, size_t len) 59 const void *data, size_t len)
61{ 60{
62 struct blk_io_trace *t; 61 struct blk_io_trace *t;
62 struct ring_buffer_event *event = NULL;
63 int pc = 0;
64 int cpu = smp_processor_id();
65 bool blk_tracer = blk_tracer_enabled;
66
67 if (blk_tracer) {
68 pc = preempt_count();
69 event = trace_buffer_lock_reserve(blk_tr, TRACE_BLK,
70 sizeof(*t) + len,
71 0, pc);
72 if (!event)
73 return;
74 t = ring_buffer_event_data(event);
75 goto record_it;
76 }
63 77
64 if (!bt->rchan) 78 if (!bt->rchan)
65 return; 79 return;
66 80
67 t = relay_reserve(bt->rchan, sizeof(*t) + len); 81 t = relay_reserve(bt->rchan, sizeof(*t) + len);
68 if (t) { 82 if (t) {
69 const int cpu = smp_processor_id();
70
71 t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION; 83 t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION;
72 t->time = ktime_to_ns(ktime_get()); 84 t->time = ktime_to_ns(ktime_get());
85record_it:
73 t->device = bt->dev; 86 t->device = bt->dev;
74 t->action = action; 87 t->action = action;
75 t->pid = pid; 88 t->pid = pid;
76 t->cpu = cpu; 89 t->cpu = cpu;
77 t->pdu_len = len; 90 t->pdu_len = len;
78 memcpy((void *) t + sizeof(*t), data, len); 91 memcpy((void *) t + sizeof(*t), data, len);
92
93 if (blk_tracer)
94 trace_buffer_unlock_commit(blk_tr, event, 0, pc);
79 } 95 }
80} 96}
81 97
@@ -111,14 +127,8 @@ void __trace_note_message(struct blk_trace *bt, const char *fmt, ...)
111 unsigned long flags; 127 unsigned long flags;
112 char *buf; 128 char *buf;
113 129
114 if (blk_tr) { 130 if (unlikely(bt->trace_state != Blktrace_running &&
115 va_start(args, fmt); 131 !blk_tracer_enabled))
116 ftrace_vprintk(fmt, args);
117 va_end(args);
118 return;
119 }
120
121 if (!bt->msg_data)
122 return; 132 return;
123 133
124 local_irq_save(flags); 134 local_irq_save(flags);
@@ -148,8 +158,8 @@ static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector,
148/* 158/*
149 * Data direction bit lookup 159 * Data direction bit lookup
150 */ 160 */
151static u32 ddir_act[2] __read_mostly = { BLK_TC_ACT(BLK_TC_READ), 161static const u32 ddir_act[2] = { BLK_TC_ACT(BLK_TC_READ),
152 BLK_TC_ACT(BLK_TC_WRITE) }; 162 BLK_TC_ACT(BLK_TC_WRITE) };
153 163
154/* The ilog2() calls fall out because they're constant */ 164/* The ilog2() calls fall out because they're constant */
155#define MASK_TC_BIT(rw, __name) ((rw & (1 << BIO_RW_ ## __name)) << \ 165#define MASK_TC_BIT(rw, __name) ((rw & (1 << BIO_RW_ ## __name)) << \
@@ -169,9 +179,9 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
169 unsigned long *sequence; 179 unsigned long *sequence;
170 pid_t pid; 180 pid_t pid;
171 int cpu, pc = 0; 181 int cpu, pc = 0;
182 bool blk_tracer = blk_tracer_enabled;
172 183
173 if (unlikely(bt->trace_state != Blktrace_running || 184 if (unlikely(bt->trace_state != Blktrace_running && !blk_tracer))
174 !blk_tracer_enabled))
175 return; 185 return;
176 186
177 what |= ddir_act[rw & WRITE]; 187 what |= ddir_act[rw & WRITE];
@@ -186,7 +196,7 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
186 return; 196 return;
187 cpu = raw_smp_processor_id(); 197 cpu = raw_smp_processor_id();
188 198
189 if (blk_tr) { 199 if (blk_tracer) {
190 tracing_record_cmdline(current); 200 tracing_record_cmdline(current);
191 201
192 pc = preempt_count(); 202 pc = preempt_count();
@@ -236,7 +246,7 @@ record_it:
236 if (pdu_len) 246 if (pdu_len)
237 memcpy((void *) t + sizeof(*t), pdu_data, pdu_len); 247 memcpy((void *) t + sizeof(*t), pdu_data, pdu_len);
238 248
239 if (blk_tr) { 249 if (blk_tracer) {
240 trace_buffer_unlock_commit(blk_tr, event, 0, pc); 250 trace_buffer_unlock_commit(blk_tr, event, 0, pc);
241 return; 251 return;
242 } 252 }
@@ -248,7 +258,7 @@ record_it:
248static struct dentry *blk_tree_root; 258static struct dentry *blk_tree_root;
249static DEFINE_MUTEX(blk_tree_mutex); 259static DEFINE_MUTEX(blk_tree_mutex);
250 260
251static void blk_trace_cleanup(struct blk_trace *bt) 261static void blk_trace_free(struct blk_trace *bt)
252{ 262{
253 debugfs_remove(bt->msg_file); 263 debugfs_remove(bt->msg_file);
254 debugfs_remove(bt->dropped_file); 264 debugfs_remove(bt->dropped_file);
@@ -256,10 +266,13 @@ static void blk_trace_cleanup(struct blk_trace *bt)
256 free_percpu(bt->sequence); 266 free_percpu(bt->sequence);
257 free_percpu(bt->msg_data); 267 free_percpu(bt->msg_data);
258 kfree(bt); 268 kfree(bt);
259 mutex_lock(&blk_probe_mutex); 269}
270
271static void blk_trace_cleanup(struct blk_trace *bt)
272{
273 blk_trace_free(bt);
260 if (atomic_dec_and_test(&blk_probes_ref)) 274 if (atomic_dec_and_test(&blk_probes_ref))
261 blk_unregister_tracepoints(); 275 blk_unregister_tracepoints();
262 mutex_unlock(&blk_probe_mutex);
263} 276}
264 277
265int blk_trace_remove(struct request_queue *q) 278int blk_trace_remove(struct request_queue *q)
@@ -270,8 +283,7 @@ int blk_trace_remove(struct request_queue *q)
270 if (!bt) 283 if (!bt)
271 return -EINVAL; 284 return -EINVAL;
272 285
273 if (bt->trace_state == Blktrace_setup || 286 if (bt->trace_state != Blktrace_running)
274 bt->trace_state == Blktrace_stopped)
275 blk_trace_cleanup(bt); 287 blk_trace_cleanup(bt);
276 288
277 return 0; 289 return 0;
@@ -414,11 +426,11 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
414 if (buts->name[i] == '/') 426 if (buts->name[i] == '/')
415 buts->name[i] = '_'; 427 buts->name[i] = '_';
416 428
417 ret = -ENOMEM;
418 bt = kzalloc(sizeof(*bt), GFP_KERNEL); 429 bt = kzalloc(sizeof(*bt), GFP_KERNEL);
419 if (!bt) 430 if (!bt)
420 goto err; 431 return -ENOMEM;
421 432
433 ret = -ENOMEM;
422 bt->sequence = alloc_percpu(unsigned long); 434 bt->sequence = alloc_percpu(unsigned long);
423 if (!bt->sequence) 435 if (!bt->sequence)
424 goto err; 436 goto err;
@@ -429,11 +441,15 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
429 441
430 ret = -ENOENT; 442 ret = -ENOENT;
431 443
444 mutex_lock(&blk_tree_mutex);
432 if (!blk_tree_root) { 445 if (!blk_tree_root) {
433 blk_tree_root = debugfs_create_dir("block", NULL); 446 blk_tree_root = debugfs_create_dir("block", NULL);
434 if (!blk_tree_root) 447 if (!blk_tree_root) {
435 return -ENOMEM; 448 mutex_unlock(&blk_tree_mutex);
449 goto err;
450 }
436 } 451 }
452 mutex_unlock(&blk_tree_mutex);
437 453
438 dir = debugfs_create_dir(buts->name, blk_tree_root); 454 dir = debugfs_create_dir(buts->name, blk_tree_root);
439 455
@@ -471,14 +487,6 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
471 bt->pid = buts->pid; 487 bt->pid = buts->pid;
472 bt->trace_state = Blktrace_setup; 488 bt->trace_state = Blktrace_setup;
473 489
474 mutex_lock(&blk_probe_mutex);
475 if (atomic_add_return(1, &blk_probes_ref) == 1) {
476 ret = blk_register_tracepoints();
477 if (ret)
478 goto probe_err;
479 }
480 mutex_unlock(&blk_probe_mutex);
481
482 ret = -EBUSY; 490 ret = -EBUSY;
483 old_bt = xchg(&q->blk_trace, bt); 491 old_bt = xchg(&q->blk_trace, bt);
484 if (old_bt) { 492 if (old_bt) {
@@ -486,22 +494,12 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
486 goto err; 494 goto err;
487 } 495 }
488 496
497 if (atomic_inc_return(&blk_probes_ref) == 1)
498 blk_register_tracepoints();
499
489 return 0; 500 return 0;
490probe_err:
491 atomic_dec(&blk_probes_ref);
492 mutex_unlock(&blk_probe_mutex);
493err: 501err:
494 if (bt) { 502 blk_trace_free(bt);
495 if (bt->msg_file)
496 debugfs_remove(bt->msg_file);
497 if (bt->dropped_file)
498 debugfs_remove(bt->dropped_file);
499 free_percpu(bt->sequence);
500 free_percpu(bt->msg_data);
501 if (bt->rchan)
502 relay_close(bt->rchan);
503 kfree(bt);
504 }
505 return ret; 503 return ret;
506} 504}
507 505
@@ -863,7 +861,7 @@ void blk_add_driver_data(struct request_queue *q,
863} 861}
864EXPORT_SYMBOL_GPL(blk_add_driver_data); 862EXPORT_SYMBOL_GPL(blk_add_driver_data);
865 863
866static int blk_register_tracepoints(void) 864static void blk_register_tracepoints(void)
867{ 865{
868 int ret; 866 int ret;
869 867
@@ -901,7 +899,6 @@ static int blk_register_tracepoints(void)
901 WARN_ON(ret); 899 WARN_ON(ret);
902 ret = register_trace_block_remap(blk_add_trace_remap); 900 ret = register_trace_block_remap(blk_add_trace_remap);
903 WARN_ON(ret); 901 WARN_ON(ret);
904 return 0;
905} 902}
906 903
907static void blk_unregister_tracepoints(void) 904static void blk_unregister_tracepoints(void)
@@ -934,25 +931,31 @@ static void blk_unregister_tracepoints(void)
934static void fill_rwbs(char *rwbs, const struct blk_io_trace *t) 931static void fill_rwbs(char *rwbs, const struct blk_io_trace *t)
935{ 932{
936 int i = 0; 933 int i = 0;
934 int tc = t->action >> BLK_TC_SHIFT;
935
936 if (t->action == BLK_TN_MESSAGE) {
937 rwbs[i++] = 'N';
938 goto out;
939 }
937 940
938 if (t->action & BLK_TC_DISCARD) 941 if (tc & BLK_TC_DISCARD)
939 rwbs[i++] = 'D'; 942 rwbs[i++] = 'D';
940 else if (t->action & BLK_TC_WRITE) 943 else if (tc & BLK_TC_WRITE)
941 rwbs[i++] = 'W'; 944 rwbs[i++] = 'W';
942 else if (t->bytes) 945 else if (t->bytes)
943 rwbs[i++] = 'R'; 946 rwbs[i++] = 'R';
944 else 947 else
945 rwbs[i++] = 'N'; 948 rwbs[i++] = 'N';
946 949
947 if (t->action & BLK_TC_AHEAD) 950 if (tc & BLK_TC_AHEAD)
948 rwbs[i++] = 'A'; 951 rwbs[i++] = 'A';
949 if (t->action & BLK_TC_BARRIER) 952 if (tc & BLK_TC_BARRIER)
950 rwbs[i++] = 'B'; 953 rwbs[i++] = 'B';
951 if (t->action & BLK_TC_SYNC) 954 if (tc & BLK_TC_SYNC)
952 rwbs[i++] = 'S'; 955 rwbs[i++] = 'S';
953 if (t->action & BLK_TC_META) 956 if (tc & BLK_TC_META)
954 rwbs[i++] = 'M'; 957 rwbs[i++] = 'M';
955 958out:
956 rwbs[i] = '\0'; 959 rwbs[i] = '\0';
957} 960}
958 961
@@ -979,7 +982,7 @@ static inline unsigned long long t_sector(const struct trace_entry *ent)
979 982
980static inline __u16 t_error(const struct trace_entry *ent) 983static inline __u16 t_error(const struct trace_entry *ent)
981{ 984{
982 return te_blk_io_trace(ent)->sector; 985 return te_blk_io_trace(ent)->error;
983} 986}
984 987
985static __u64 get_pdu_int(const struct trace_entry *ent) 988static __u64 get_pdu_int(const struct trace_entry *ent)
@@ -999,29 +1002,31 @@ static void get_pdu_remap(const struct trace_entry *ent,
999 r->sector = be64_to_cpu(sector); 1002 r->sector = be64_to_cpu(sector);
1000} 1003}
1001 1004
1002static int blk_log_action_iter(struct trace_iterator *iter, const char *act) 1005typedef int (blk_log_action_t) (struct trace_iterator *iter, const char *act);
1006
1007static int blk_log_action_classic(struct trace_iterator *iter, const char *act)
1003{ 1008{
1004 char rwbs[6]; 1009 char rwbs[6];
1005 unsigned long long ts = ns2usecs(iter->ts); 1010 unsigned long long ts = iter->ts;
1006 unsigned long usec_rem = do_div(ts, USEC_PER_SEC); 1011 unsigned long nsec_rem = do_div(ts, NSEC_PER_SEC);
1007 unsigned secs = (unsigned long)ts; 1012 unsigned secs = (unsigned long)ts;
1008 const struct trace_entry *ent = iter->ent; 1013 const struct blk_io_trace *t = te_blk_io_trace(iter->ent);
1009 const struct blk_io_trace *t = (const struct blk_io_trace *)ent;
1010 1014
1011 fill_rwbs(rwbs, t); 1015 fill_rwbs(rwbs, t);
1012 1016
1013 return trace_seq_printf(&iter->seq, 1017 return trace_seq_printf(&iter->seq,
1014 "%3d,%-3d %2d %5d.%06lu %5u %2s %3s ", 1018 "%3d,%-3d %2d %5d.%09lu %5u %2s %3s ",
1015 MAJOR(t->device), MINOR(t->device), iter->cpu, 1019 MAJOR(t->device), MINOR(t->device), iter->cpu,
1016 secs, usec_rem, ent->pid, act, rwbs); 1020 secs, nsec_rem, iter->ent->pid, act, rwbs);
1017} 1021}
1018 1022
1019static int blk_log_action_seq(struct trace_seq *s, const struct blk_io_trace *t, 1023static int blk_log_action(struct trace_iterator *iter, const char *act)
1020 const char *act)
1021{ 1024{
1022 char rwbs[6]; 1025 char rwbs[6];
1026 const struct blk_io_trace *t = te_blk_io_trace(iter->ent);
1027
1023 fill_rwbs(rwbs, t); 1028 fill_rwbs(rwbs, t);
1024 return trace_seq_printf(s, "%3d,%-3d %2s %3s ", 1029 return trace_seq_printf(&iter->seq, "%3d,%-3d %2s %3s ",
1025 MAJOR(t->device), MINOR(t->device), act, rwbs); 1030 MAJOR(t->device), MINOR(t->device), act, rwbs);
1026} 1031}
1027 1032
@@ -1085,6 +1090,17 @@ static int blk_log_split(struct trace_seq *s, const struct trace_entry *ent)
1085 get_pdu_int(ent), cmd); 1090 get_pdu_int(ent), cmd);
1086} 1091}
1087 1092
1093static int blk_log_msg(struct trace_seq *s, const struct trace_entry *ent)
1094{
1095 int ret;
1096 const struct blk_io_trace *t = te_blk_io_trace(ent);
1097
1098 ret = trace_seq_putmem(s, t + 1, t->pdu_len);
1099 if (ret)
1100 return trace_seq_putc(s, '\n');
1101 return ret;
1102}
1103
1088/* 1104/*
1089 * struct tracer operations 1105 * struct tracer operations
1090 */ 1106 */
@@ -1099,11 +1115,7 @@ static void blk_tracer_print_header(struct seq_file *m)
1099 1115
1100static void blk_tracer_start(struct trace_array *tr) 1116static void blk_tracer_start(struct trace_array *tr)
1101{ 1117{
1102 mutex_lock(&blk_probe_mutex); 1118 blk_tracer_enabled = true;
1103 if (atomic_add_return(1, &blk_probes_ref) == 1)
1104 if (blk_register_tracepoints())
1105 atomic_dec(&blk_probes_ref);
1106 mutex_unlock(&blk_probe_mutex);
1107 trace_flags &= ~TRACE_ITER_CONTEXT_INFO; 1119 trace_flags &= ~TRACE_ITER_CONTEXT_INFO;
1108} 1120}
1109 1121
@@ -1111,38 +1123,24 @@ static int blk_tracer_init(struct trace_array *tr)
1111{ 1123{
1112 blk_tr = tr; 1124 blk_tr = tr;
1113 blk_tracer_start(tr); 1125 blk_tracer_start(tr);
1114 mutex_lock(&blk_probe_mutex);
1115 blk_tracer_enabled++;
1116 mutex_unlock(&blk_probe_mutex);
1117 return 0; 1126 return 0;
1118} 1127}
1119 1128
1120static void blk_tracer_stop(struct trace_array *tr) 1129static void blk_tracer_stop(struct trace_array *tr)
1121{ 1130{
1131 blk_tracer_enabled = false;
1122 trace_flags |= TRACE_ITER_CONTEXT_INFO; 1132 trace_flags |= TRACE_ITER_CONTEXT_INFO;
1123 mutex_lock(&blk_probe_mutex);
1124 if (atomic_dec_and_test(&blk_probes_ref))
1125 blk_unregister_tracepoints();
1126 mutex_unlock(&blk_probe_mutex);
1127} 1133}
1128 1134
1129static void blk_tracer_reset(struct trace_array *tr) 1135static void blk_tracer_reset(struct trace_array *tr)
1130{ 1136{
1131 if (!atomic_read(&blk_probes_ref))
1132 return;
1133
1134 mutex_lock(&blk_probe_mutex);
1135 blk_tracer_enabled--;
1136 WARN_ON(blk_tracer_enabled < 0);
1137 mutex_unlock(&blk_probe_mutex);
1138
1139 blk_tracer_stop(tr); 1137 blk_tracer_stop(tr);
1140} 1138}
1141 1139
1142static struct { 1140static const struct {
1143 const char *act[2]; 1141 const char *act[2];
1144 int (*print)(struct trace_seq *s, const struct trace_entry *ent); 1142 int (*print)(struct trace_seq *s, const struct trace_entry *ent);
1145} what2act[] __read_mostly = { 1143} what2act[] = {
1146 [__BLK_TA_QUEUE] = {{ "Q", "queue" }, blk_log_generic }, 1144 [__BLK_TA_QUEUE] = {{ "Q", "queue" }, blk_log_generic },
1147 [__BLK_TA_BACKMERGE] = {{ "M", "backmerge" }, blk_log_generic }, 1145 [__BLK_TA_BACKMERGE] = {{ "M", "backmerge" }, blk_log_generic },
1148 [__BLK_TA_FRONTMERGE] = {{ "F", "frontmerge" }, blk_log_generic }, 1146 [__BLK_TA_FRONTMERGE] = {{ "F", "frontmerge" }, blk_log_generic },
@@ -1160,29 +1158,48 @@ static struct {
1160 [__BLK_TA_REMAP] = {{ "A", "remap" }, blk_log_remap }, 1158 [__BLK_TA_REMAP] = {{ "A", "remap" }, blk_log_remap },
1161}; 1159};
1162 1160
1163static enum print_line_t blk_trace_event_print(struct trace_iterator *iter, 1161static enum print_line_t print_one_line(struct trace_iterator *iter,
1164 int flags) 1162 bool classic)
1165{ 1163{
1166 struct trace_seq *s = &iter->seq; 1164 struct trace_seq *s = &iter->seq;
1167 const struct blk_io_trace *t = (struct blk_io_trace *)iter->ent; 1165 const struct blk_io_trace *t;
1168 const u16 what = t->action & ((1 << BLK_TC_SHIFT) - 1); 1166 u16 what;
1169 int ret; 1167 int ret;
1168 bool long_act;
1169 blk_log_action_t *log_action;
1170 1170
1171 if (!trace_print_context(iter)) 1171 t = te_blk_io_trace(iter->ent);
1172 return TRACE_TYPE_PARTIAL_LINE; 1172 what = t->action & ((1 << BLK_TC_SHIFT) - 1);
1173 long_act = !!(trace_flags & TRACE_ITER_VERBOSE);
1174 log_action = classic ? &blk_log_action_classic : &blk_log_action;
1173 1175
1174 if (unlikely(what == 0 || what > ARRAY_SIZE(what2act))) 1176 if (t->action == BLK_TN_MESSAGE) {
1177 ret = log_action(iter, long_act ? "message" : "m");
1178 if (ret)
1179 ret = blk_log_msg(s, iter->ent);
1180 goto out;
1181 }
1182
1183 if (unlikely(what == 0 || what >= ARRAY_SIZE(what2act)))
1175 ret = trace_seq_printf(s, "Bad pc action %x\n", what); 1184 ret = trace_seq_printf(s, "Bad pc action %x\n", what);
1176 else { 1185 else {
1177 const bool long_act = !!(trace_flags & TRACE_ITER_VERBOSE); 1186 ret = log_action(iter, what2act[what].act[long_act]);
1178 ret = blk_log_action_seq(s, t, what2act[what].act[long_act]);
1179 if (ret) 1187 if (ret)
1180 ret = what2act[what].print(s, iter->ent); 1188 ret = what2act[what].print(s, iter->ent);
1181 } 1189 }
1182 1190out:
1183 return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE; 1191 return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE;
1184} 1192}
1185 1193
1194static enum print_line_t blk_trace_event_print(struct trace_iterator *iter,
1195 int flags)
1196{
1197 if (!trace_print_context(iter))
1198 return TRACE_TYPE_PARTIAL_LINE;
1199
1200 return print_one_line(iter, false);
1201}
1202
1186static int blk_trace_synthesize_old_trace(struct trace_iterator *iter) 1203static int blk_trace_synthesize_old_trace(struct trace_iterator *iter)
1187{ 1204{
1188 struct trace_seq *s = &iter->seq; 1205 struct trace_seq *s = &iter->seq;
@@ -1190,7 +1207,7 @@ static int blk_trace_synthesize_old_trace(struct trace_iterator *iter)
1190 const int offset = offsetof(struct blk_io_trace, sector); 1207 const int offset = offsetof(struct blk_io_trace, sector);
1191 struct blk_io_trace old = { 1208 struct blk_io_trace old = {
1192 .magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION, 1209 .magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION,
1193 .time = ns2usecs(iter->ts), 1210 .time = iter->ts,
1194 }; 1211 };
1195 1212
1196 if (!trace_seq_putmem(s, &old, offset)) 1213 if (!trace_seq_putmem(s, &old, offset))
@@ -1208,26 +1225,10 @@ blk_trace_event_print_binary(struct trace_iterator *iter, int flags)
1208 1225
1209static enum print_line_t blk_tracer_print_line(struct trace_iterator *iter) 1226static enum print_line_t blk_tracer_print_line(struct trace_iterator *iter)
1210{ 1227{
1211 const struct blk_io_trace *t;
1212 u16 what;
1213 int ret;
1214
1215 if (!(blk_tracer_flags.val & TRACE_BLK_OPT_CLASSIC)) 1228 if (!(blk_tracer_flags.val & TRACE_BLK_OPT_CLASSIC))
1216 return TRACE_TYPE_UNHANDLED; 1229 return TRACE_TYPE_UNHANDLED;
1217 1230
1218 t = (const struct blk_io_trace *)iter->ent; 1231 return print_one_line(iter, true);
1219 what = t->action & ((1 << BLK_TC_SHIFT) - 1);
1220
1221 if (unlikely(what == 0 || what > ARRAY_SIZE(what2act)))
1222 ret = trace_seq_printf(&iter->seq, "Bad pc action %x\n", what);
1223 else {
1224 const bool long_act = !!(trace_flags & TRACE_ITER_VERBOSE);
1225 ret = blk_log_action_iter(iter, what2act[what].act[long_act]);
1226 if (ret)
1227 ret = what2act[what].print(&iter->seq, iter->ent);
1228 }
1229
1230 return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE;
1231} 1232}
1232 1233
1233static struct tracer blk_tracer __read_mostly = { 1234static struct tracer blk_tracer __read_mostly = {
@@ -1273,7 +1274,10 @@ static int blk_trace_remove_queue(struct request_queue *q)
1273 if (bt == NULL) 1274 if (bt == NULL)
1274 return -EINVAL; 1275 return -EINVAL;
1275 1276
1276 kfree(bt); 1277 if (atomic_dec_and_test(&blk_probes_ref))
1278 blk_unregister_tracepoints();
1279
1280 blk_trace_free(bt);
1277 return 0; 1281 return 0;
1278} 1282}
1279 1283
@@ -1283,26 +1287,33 @@ static int blk_trace_remove_queue(struct request_queue *q)
1283static int blk_trace_setup_queue(struct request_queue *q, dev_t dev) 1287static int blk_trace_setup_queue(struct request_queue *q, dev_t dev)
1284{ 1288{
1285 struct blk_trace *old_bt, *bt = NULL; 1289 struct blk_trace *old_bt, *bt = NULL;
1286 int ret; 1290 int ret = -ENOMEM;
1287 1291
1288 ret = -ENOMEM;
1289 bt = kzalloc(sizeof(*bt), GFP_KERNEL); 1292 bt = kzalloc(sizeof(*bt), GFP_KERNEL);
1290 if (!bt) 1293 if (!bt)
1291 goto err; 1294 return -ENOMEM;
1295
1296 bt->msg_data = __alloc_percpu(BLK_TN_MAX_MSG, __alignof__(char));
1297 if (!bt->msg_data)
1298 goto free_bt;
1292 1299
1293 bt->dev = dev; 1300 bt->dev = dev;
1294 bt->act_mask = (u16)-1; 1301 bt->act_mask = (u16)-1;
1295 bt->end_lba = -1ULL; 1302 bt->end_lba = -1ULL;
1296 bt->trace_state = Blktrace_running;
1297 1303
1298 old_bt = xchg(&q->blk_trace, bt); 1304 old_bt = xchg(&q->blk_trace, bt);
1299 if (old_bt != NULL) { 1305 if (old_bt != NULL) {
1300 (void)xchg(&q->blk_trace, old_bt); 1306 (void)xchg(&q->blk_trace, old_bt);
1301 kfree(bt);
1302 ret = -EBUSY; 1307 ret = -EBUSY;
1308 goto free_bt;
1303 } 1309 }
1310
1311 if (atomic_inc_return(&blk_probes_ref) == 1)
1312 blk_register_tracepoints();
1304 return 0; 1313 return 0;
1305err: 1314
1315free_bt:
1316 blk_trace_free(bt);
1306 return ret; 1317 return ret;
1307} 1318}
1308 1319
@@ -1310,72 +1321,6 @@ err:
1310 * sysfs interface to enable and configure tracing 1321 * sysfs interface to enable and configure tracing
1311 */ 1322 */
1312 1323
1313static ssize_t sysfs_blk_trace_enable_show(struct device *dev,
1314 struct device_attribute *attr,
1315 char *buf)
1316{
1317 struct hd_struct *p = dev_to_part(dev);
1318 struct block_device *bdev;
1319 ssize_t ret = -ENXIO;
1320
1321 lock_kernel();
1322 bdev = bdget(part_devt(p));
1323 if (bdev != NULL) {
1324 struct request_queue *q = bdev_get_queue(bdev);
1325
1326 if (q != NULL) {
1327 mutex_lock(&bdev->bd_mutex);
1328 ret = sprintf(buf, "%u\n", !!q->blk_trace);
1329 mutex_unlock(&bdev->bd_mutex);
1330 }
1331
1332 bdput(bdev);
1333 }
1334
1335 unlock_kernel();
1336 return ret;
1337}
1338
1339static ssize_t sysfs_blk_trace_enable_store(struct device *dev,
1340 struct device_attribute *attr,
1341 const char *buf, size_t count)
1342{
1343 struct block_device *bdev;
1344 struct request_queue *q;
1345 struct hd_struct *p;
1346 int value;
1347 ssize_t ret = -ENXIO;
1348
1349 if (count == 0 || sscanf(buf, "%d", &value) != 1)
1350 goto out;
1351
1352 lock_kernel();
1353 p = dev_to_part(dev);
1354 bdev = bdget(part_devt(p));
1355 if (bdev == NULL)
1356 goto out_unlock_kernel;
1357
1358 q = bdev_get_queue(bdev);
1359 if (q == NULL)
1360 goto out_bdput;
1361
1362 mutex_lock(&bdev->bd_mutex);
1363 if (value)
1364 ret = blk_trace_setup_queue(q, bdev->bd_dev);
1365 else
1366 ret = blk_trace_remove_queue(q);
1367 mutex_unlock(&bdev->bd_mutex);
1368
1369 if (ret == 0)
1370 ret = count;
1371out_bdput:
1372 bdput(bdev);
1373out_unlock_kernel:
1374 unlock_kernel();
1375out:
1376 return ret;
1377}
1378
1379static ssize_t sysfs_blk_trace_attr_show(struct device *dev, 1324static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
1380 struct device_attribute *attr, 1325 struct device_attribute *attr,
1381 char *buf); 1326 char *buf);
@@ -1387,8 +1332,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
1387 sysfs_blk_trace_attr_show, \ 1332 sysfs_blk_trace_attr_show, \
1388 sysfs_blk_trace_attr_store) 1333 sysfs_blk_trace_attr_store)
1389 1334
1390static DEVICE_ATTR(enable, S_IRUGO | S_IWUSR, 1335static BLK_TRACE_DEVICE_ATTR(enable);
1391 sysfs_blk_trace_enable_show, sysfs_blk_trace_enable_store);
1392static BLK_TRACE_DEVICE_ATTR(act_mask); 1336static BLK_TRACE_DEVICE_ATTR(act_mask);
1393static BLK_TRACE_DEVICE_ATTR(pid); 1337static BLK_TRACE_DEVICE_ATTR(pid);
1394static BLK_TRACE_DEVICE_ATTR(start_lba); 1338static BLK_TRACE_DEVICE_ATTR(start_lba);
@@ -1408,53 +1352,85 @@ struct attribute_group blk_trace_attr_group = {
1408 .attrs = blk_trace_attrs, 1352 .attrs = blk_trace_attrs,
1409}; 1353};
1410 1354
1411static int blk_str2act_mask(const char *str) 1355static const struct {
1356 int mask;
1357 const char *str;
1358} mask_maps[] = {
1359 { BLK_TC_READ, "read" },
1360 { BLK_TC_WRITE, "write" },
1361 { BLK_TC_BARRIER, "barrier" },
1362 { BLK_TC_SYNC, "sync" },
1363 { BLK_TC_QUEUE, "queue" },
1364 { BLK_TC_REQUEUE, "requeue" },
1365 { BLK_TC_ISSUE, "issue" },
1366 { BLK_TC_COMPLETE, "complete" },
1367 { BLK_TC_FS, "fs" },
1368 { BLK_TC_PC, "pc" },
1369 { BLK_TC_AHEAD, "ahead" },
1370 { BLK_TC_META, "meta" },
1371 { BLK_TC_DISCARD, "discard" },
1372 { BLK_TC_DRV_DATA, "drv_data" },
1373};
1374
1375static int blk_trace_str2mask(const char *str)
1412{ 1376{
1377 int i;
1413 int mask = 0; 1378 int mask = 0;
1414 char *copy = kstrdup(str, GFP_KERNEL), *s; 1379 char *s, *token;
1415 1380
1416 if (copy == NULL) 1381 s = kstrdup(str, GFP_KERNEL);
1382 if (s == NULL)
1417 return -ENOMEM; 1383 return -ENOMEM;
1418 1384 s = strstrip(s);
1419 s = strstrip(copy);
1420 1385
1421 while (1) { 1386 while (1) {
1422 char *sep = strchr(s, ','); 1387 token = strsep(&s, ",");
1423 1388 if (token == NULL)
1424 if (sep != NULL)
1425 *sep = '\0';
1426
1427 if (strcasecmp(s, "barrier") == 0)
1428 mask |= BLK_TC_BARRIER;
1429 else if (strcasecmp(s, "complete") == 0)
1430 mask |= BLK_TC_COMPLETE;
1431 else if (strcasecmp(s, "fs") == 0)
1432 mask |= BLK_TC_FS;
1433 else if (strcasecmp(s, "issue") == 0)
1434 mask |= BLK_TC_ISSUE;
1435 else if (strcasecmp(s, "pc") == 0)
1436 mask |= BLK_TC_PC;
1437 else if (strcasecmp(s, "queue") == 0)
1438 mask |= BLK_TC_QUEUE;
1439 else if (strcasecmp(s, "read") == 0)
1440 mask |= BLK_TC_READ;
1441 else if (strcasecmp(s, "requeue") == 0)
1442 mask |= BLK_TC_REQUEUE;
1443 else if (strcasecmp(s, "sync") == 0)
1444 mask |= BLK_TC_SYNC;
1445 else if (strcasecmp(s, "write") == 0)
1446 mask |= BLK_TC_WRITE;
1447
1448 if (sep == NULL)
1449 break; 1389 break;
1450 1390
1451 s = sep + 1; 1391 if (*token == '\0')
1392 continue;
1393
1394 for (i = 0; i < ARRAY_SIZE(mask_maps); i++) {
1395 if (strcasecmp(token, mask_maps[i].str) == 0) {
1396 mask |= mask_maps[i].mask;
1397 break;
1398 }
1399 }
1400 if (i == ARRAY_SIZE(mask_maps)) {
1401 mask = -EINVAL;
1402 break;
1403 }
1452 } 1404 }
1453 kfree(copy); 1405 kfree(s);
1454 1406
1455 return mask; 1407 return mask;
1456} 1408}
1457 1409
1410static ssize_t blk_trace_mask2str(char *buf, int mask)
1411{
1412 int i;
1413 char *p = buf;
1414
1415 for (i = 0; i < ARRAY_SIZE(mask_maps); i++) {
1416 if (mask & mask_maps[i].mask) {
1417 p += sprintf(p, "%s%s",
1418 (p == buf) ? "" : ",", mask_maps[i].str);
1419 }
1420 }
1421 *p++ = '\n';
1422
1423 return p - buf;
1424}
1425
1426static struct request_queue *blk_trace_get_queue(struct block_device *bdev)
1427{
1428 if (bdev->bd_disk == NULL)
1429 return NULL;
1430
1431 return bdev_get_queue(bdev);
1432}
1433
1458static ssize_t sysfs_blk_trace_attr_show(struct device *dev, 1434static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
1459 struct device_attribute *attr, 1435 struct device_attribute *attr,
1460 char *buf) 1436 char *buf)
@@ -1469,20 +1445,29 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
1469 if (bdev == NULL) 1445 if (bdev == NULL)
1470 goto out_unlock_kernel; 1446 goto out_unlock_kernel;
1471 1447
1472 q = bdev_get_queue(bdev); 1448 q = blk_trace_get_queue(bdev);
1473 if (q == NULL) 1449 if (q == NULL)
1474 goto out_bdput; 1450 goto out_bdput;
1451
1475 mutex_lock(&bdev->bd_mutex); 1452 mutex_lock(&bdev->bd_mutex);
1453
1454 if (attr == &dev_attr_enable) {
1455 ret = sprintf(buf, "%u\n", !!q->blk_trace);
1456 goto out_unlock_bdev;
1457 }
1458
1476 if (q->blk_trace == NULL) 1459 if (q->blk_trace == NULL)
1477 ret = sprintf(buf, "disabled\n"); 1460 ret = sprintf(buf, "disabled\n");
1478 else if (attr == &dev_attr_act_mask) 1461 else if (attr == &dev_attr_act_mask)
1479 ret = sprintf(buf, "%#x\n", q->blk_trace->act_mask); 1462 ret = blk_trace_mask2str(buf, q->blk_trace->act_mask);
1480 else if (attr == &dev_attr_pid) 1463 else if (attr == &dev_attr_pid)
1481 ret = sprintf(buf, "%u\n", q->blk_trace->pid); 1464 ret = sprintf(buf, "%u\n", q->blk_trace->pid);
1482 else if (attr == &dev_attr_start_lba) 1465 else if (attr == &dev_attr_start_lba)
1483 ret = sprintf(buf, "%llu\n", q->blk_trace->start_lba); 1466 ret = sprintf(buf, "%llu\n", q->blk_trace->start_lba);
1484 else if (attr == &dev_attr_end_lba) 1467 else if (attr == &dev_attr_end_lba)
1485 ret = sprintf(buf, "%llu\n", q->blk_trace->end_lba); 1468 ret = sprintf(buf, "%llu\n", q->blk_trace->end_lba);
1469
1470out_unlock_bdev:
1486 mutex_unlock(&bdev->bd_mutex); 1471 mutex_unlock(&bdev->bd_mutex);
1487out_bdput: 1472out_bdput:
1488 bdput(bdev); 1473 bdput(bdev);
@@ -1499,7 +1484,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
1499 struct request_queue *q; 1484 struct request_queue *q;
1500 struct hd_struct *p; 1485 struct hd_struct *p;
1501 u64 value; 1486 u64 value;
1502 ssize_t ret = -ENXIO; 1487 ssize_t ret = -EINVAL;
1503 1488
1504 if (count == 0) 1489 if (count == 0)
1505 goto out; 1490 goto out;
@@ -1507,24 +1492,36 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
1507 if (attr == &dev_attr_act_mask) { 1492 if (attr == &dev_attr_act_mask) {
1508 if (sscanf(buf, "%llx", &value) != 1) { 1493 if (sscanf(buf, "%llx", &value) != 1) {
1509 /* Assume it is a list of trace category names */ 1494 /* Assume it is a list of trace category names */
1510 value = blk_str2act_mask(buf); 1495 ret = blk_trace_str2mask(buf);
1511 if (value < 0) 1496 if (ret < 0)
1512 goto out; 1497 goto out;
1498 value = ret;
1513 } 1499 }
1514 } else if (sscanf(buf, "%llu", &value) != 1) 1500 } else if (sscanf(buf, "%llu", &value) != 1)
1515 goto out; 1501 goto out;
1516 1502
1503 ret = -ENXIO;
1504
1517 lock_kernel(); 1505 lock_kernel();
1518 p = dev_to_part(dev); 1506 p = dev_to_part(dev);
1519 bdev = bdget(part_devt(p)); 1507 bdev = bdget(part_devt(p));
1520 if (bdev == NULL) 1508 if (bdev == NULL)
1521 goto out_unlock_kernel; 1509 goto out_unlock_kernel;
1522 1510
1523 q = bdev_get_queue(bdev); 1511 q = blk_trace_get_queue(bdev);
1524 if (q == NULL) 1512 if (q == NULL)
1525 goto out_bdput; 1513 goto out_bdput;
1526 1514
1527 mutex_lock(&bdev->bd_mutex); 1515 mutex_lock(&bdev->bd_mutex);
1516
1517 if (attr == &dev_attr_enable) {
1518 if (value)
1519 ret = blk_trace_setup_queue(q, bdev->bd_dev);
1520 else
1521 ret = blk_trace_remove_queue(q);
1522 goto out_unlock_bdev;
1523 }
1524
1528 ret = 0; 1525 ret = 0;
1529 if (q->blk_trace == NULL) 1526 if (q->blk_trace == NULL)
1530 ret = blk_trace_setup_queue(q, bdev->bd_dev); 1527 ret = blk_trace_setup_queue(q, bdev->bd_dev);
@@ -1538,13 +1535,15 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
1538 q->blk_trace->start_lba = value; 1535 q->blk_trace->start_lba = value;
1539 else if (attr == &dev_attr_end_lba) 1536 else if (attr == &dev_attr_end_lba)
1540 q->blk_trace->end_lba = value; 1537 q->blk_trace->end_lba = value;
1541 ret = count;
1542 } 1538 }
1539
1540out_unlock_bdev:
1543 mutex_unlock(&bdev->bd_mutex); 1541 mutex_unlock(&bdev->bd_mutex);
1544out_bdput: 1542out_bdput:
1545 bdput(bdev); 1543 bdput(bdev);
1546out_unlock_kernel: 1544out_unlock_kernel:
1547 unlock_kernel(); 1545 unlock_kernel();
1548out: 1546out:
1549 return ret; 1547 return ret ? ret : count;
1550} 1548}
1549
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 7847806eefef..1752a63f37c0 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -29,6 +29,8 @@
29#include <linux/list.h> 29#include <linux/list.h>
30#include <linux/hash.h> 30#include <linux/hash.h>
31 31
32#include <trace/sched.h>
33
32#include <asm/ftrace.h> 34#include <asm/ftrace.h>
33 35
34#include "trace.h" 36#include "trace.h"
@@ -339,7 +341,7 @@ static inline int record_frozen(struct dyn_ftrace *rec)
339 341
340static void ftrace_free_rec(struct dyn_ftrace *rec) 342static void ftrace_free_rec(struct dyn_ftrace *rec)
341{ 343{
342 rec->ip = (unsigned long)ftrace_free_records; 344 rec->freelist = ftrace_free_records;
343 ftrace_free_records = rec; 345 ftrace_free_records = rec;
344 rec->flags |= FTRACE_FL_FREE; 346 rec->flags |= FTRACE_FL_FREE;
345} 347}
@@ -356,9 +358,14 @@ void ftrace_release(void *start, unsigned long size)
356 358
357 mutex_lock(&ftrace_lock); 359 mutex_lock(&ftrace_lock);
358 do_for_each_ftrace_rec(pg, rec) { 360 do_for_each_ftrace_rec(pg, rec) {
359 if ((rec->ip >= s) && (rec->ip < e) && 361 if ((rec->ip >= s) && (rec->ip < e)) {
360 !(rec->flags & FTRACE_FL_FREE)) 362 /*
363 * rec->ip is changed in ftrace_free_rec()
364 * It should not between s and e if record was freed.
365 */
366 FTRACE_WARN_ON(rec->flags & FTRACE_FL_FREE);
361 ftrace_free_rec(rec); 367 ftrace_free_rec(rec);
368 }
362 } while_for_each_ftrace_rec(); 369 } while_for_each_ftrace_rec();
363 mutex_unlock(&ftrace_lock); 370 mutex_unlock(&ftrace_lock);
364} 371}
@@ -377,7 +384,7 @@ static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip)
377 return NULL; 384 return NULL;
378 } 385 }
379 386
380 ftrace_free_records = (void *)rec->ip; 387 ftrace_free_records = rec->freelist;
381 memset(rec, 0, sizeof(*rec)); 388 memset(rec, 0, sizeof(*rec));
382 return rec; 389 return rec;
383 } 390 }
@@ -409,7 +416,7 @@ ftrace_record_ip(unsigned long ip)
409 return NULL; 416 return NULL;
410 417
411 rec->ip = ip; 418 rec->ip = ip;
412 rec->flags = (unsigned long)ftrace_new_addrs; 419 rec->newlist = ftrace_new_addrs;
413 ftrace_new_addrs = rec; 420 ftrace_new_addrs = rec;
414 421
415 return rec; 422 return rec;
@@ -729,7 +736,7 @@ static int ftrace_update_code(struct module *mod)
729 return -1; 736 return -1;
730 737
731 p = ftrace_new_addrs; 738 p = ftrace_new_addrs;
732 ftrace_new_addrs = (struct dyn_ftrace *)p->flags; 739 ftrace_new_addrs = p->newlist;
733 p->flags = 0L; 740 p->flags = 0L;
734 741
735 /* convert record (i.e, patch mcount-call with NOP) */ 742 /* convert record (i.e, patch mcount-call with NOP) */
@@ -2262,7 +2269,7 @@ ftrace_pid_read(struct file *file, char __user *ubuf,
2262 if (ftrace_pid_trace == ftrace_swapper_pid) 2269 if (ftrace_pid_trace == ftrace_swapper_pid)
2263 r = sprintf(buf, "swapper tasks\n"); 2270 r = sprintf(buf, "swapper tasks\n");
2264 else if (ftrace_pid_trace) 2271 else if (ftrace_pid_trace)
2265 r = sprintf(buf, "%u\n", pid_nr(ftrace_pid_trace)); 2272 r = sprintf(buf, "%u\n", pid_vnr(ftrace_pid_trace));
2266 else 2273 else
2267 r = sprintf(buf, "no pid\n"); 2274 r = sprintf(buf, "no pid\n");
2268 2275
@@ -2590,6 +2597,38 @@ free:
2590 return ret; 2597 return ret;
2591} 2598}
2592 2599
2600static void
2601ftrace_graph_probe_sched_switch(struct rq *__rq, struct task_struct *prev,
2602 struct task_struct *next)
2603{
2604 unsigned long long timestamp;
2605 int index;
2606
2607 /*
2608 * Does the user want to count the time a function was asleep.
2609 * If so, do not update the time stamps.
2610 */
2611 if (trace_flags & TRACE_ITER_SLEEP_TIME)
2612 return;
2613
2614 timestamp = trace_clock_local();
2615
2616 prev->ftrace_timestamp = timestamp;
2617
2618 /* only process tasks that we timestamped */
2619 if (!next->ftrace_timestamp)
2620 return;
2621
2622 /*
2623 * Update all the counters in next to make up for the
2624 * time next was sleeping.
2625 */
2626 timestamp -= next->ftrace_timestamp;
2627
2628 for (index = next->curr_ret_stack; index >= 0; index--)
2629 next->ret_stack[index].calltime += timestamp;
2630}
2631
2593/* Allocate a return stack for each task */ 2632/* Allocate a return stack for each task */
2594static int start_graph_tracing(void) 2633static int start_graph_tracing(void)
2595{ 2634{
@@ -2611,6 +2650,13 @@ static int start_graph_tracing(void)
2611 ret = alloc_retstack_tasklist(ret_stack_list); 2650 ret = alloc_retstack_tasklist(ret_stack_list);
2612 } while (ret == -EAGAIN); 2651 } while (ret == -EAGAIN);
2613 2652
2653 if (!ret) {
2654 ret = register_trace_sched_switch(ftrace_graph_probe_sched_switch);
2655 if (ret)
2656 pr_info("ftrace_graph: Couldn't activate tracepoint"
2657 " probe to kernel_sched_switch\n");
2658 }
2659
2614 kfree(ret_stack_list); 2660 kfree(ret_stack_list);
2615 return ret; 2661 return ret;
2616} 2662}
@@ -2643,6 +2689,12 @@ int register_ftrace_graph(trace_func_graph_ret_t retfunc,
2643 2689
2644 mutex_lock(&ftrace_lock); 2690 mutex_lock(&ftrace_lock);
2645 2691
2692 /* we currently allow only one tracer registered at a time */
2693 if (atomic_read(&ftrace_graph_active)) {
2694 ret = -EBUSY;
2695 goto out;
2696 }
2697
2646 ftrace_suspend_notifier.notifier_call = ftrace_suspend_notifier_call; 2698 ftrace_suspend_notifier.notifier_call = ftrace_suspend_notifier_call;
2647 register_pm_notifier(&ftrace_suspend_notifier); 2699 register_pm_notifier(&ftrace_suspend_notifier);
2648 2700
@@ -2668,6 +2720,7 @@ void unregister_ftrace_graph(void)
2668 mutex_lock(&ftrace_lock); 2720 mutex_lock(&ftrace_lock);
2669 2721
2670 atomic_dec(&ftrace_graph_active); 2722 atomic_dec(&ftrace_graph_active);
2723 unregister_trace_sched_switch(ftrace_graph_probe_sched_switch);
2671 ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub; 2724 ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub;
2672 ftrace_graph_entry = ftrace_graph_entry_stub; 2725 ftrace_graph_entry = ftrace_graph_entry_stub;
2673 ftrace_shutdown(FTRACE_STOP_FUNC_RET); 2726 ftrace_shutdown(FTRACE_STOP_FUNC_RET);
@@ -2688,6 +2741,7 @@ void ftrace_graph_init_task(struct task_struct *t)
2688 t->curr_ret_stack = -1; 2741 t->curr_ret_stack = -1;
2689 atomic_set(&t->tracing_graph_pause, 0); 2742 atomic_set(&t->tracing_graph_pause, 0);
2690 atomic_set(&t->trace_overrun, 0); 2743 atomic_set(&t->trace_overrun, 0);
2744 t->ftrace_timestamp = 0;
2691 } else 2745 } else
2692 t->ret_stack = NULL; 2746 t->ret_stack = NULL;
2693} 2747}
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 808b14bbf076..edce2ff38944 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -189,16 +189,65 @@ enum {
189 RB_LEN_TIME_STAMP = 16, 189 RB_LEN_TIME_STAMP = 16,
190}; 190};
191 191
192/* inline for ring buffer fast paths */ 192static inline int rb_null_event(struct ring_buffer_event *event)
193{
194 return event->type == RINGBUF_TYPE_PADDING && event->time_delta == 0;
195}
196
197static inline int rb_discarded_event(struct ring_buffer_event *event)
198{
199 return event->type == RINGBUF_TYPE_PADDING && event->time_delta;
200}
201
202static void rb_event_set_padding(struct ring_buffer_event *event)
203{
204 event->type = RINGBUF_TYPE_PADDING;
205 event->time_delta = 0;
206}
207
208/**
209 * ring_buffer_event_discard - discard an event in the ring buffer
210 * @buffer: the ring buffer
211 * @event: the event to discard
212 *
213 * Sometimes a event that is in the ring buffer needs to be ignored.
214 * This function lets the user discard an event in the ring buffer
215 * and then that event will not be read later.
216 *
217 * Note, it is up to the user to be careful with this, and protect
218 * against races. If the user discards an event that has been consumed
219 * it is possible that it could corrupt the ring buffer.
220 */
221void ring_buffer_event_discard(struct ring_buffer_event *event)
222{
223 event->type = RINGBUF_TYPE_PADDING;
224 /* time delta must be non zero */
225 if (!event->time_delta)
226 event->time_delta = 1;
227}
228
193static unsigned 229static unsigned
194rb_event_length(struct ring_buffer_event *event) 230rb_event_data_length(struct ring_buffer_event *event)
195{ 231{
196 unsigned length; 232 unsigned length;
197 233
234 if (event->len)
235 length = event->len * RB_ALIGNMENT;
236 else
237 length = event->array[0];
238 return length + RB_EVNT_HDR_SIZE;
239}
240
241/* inline for ring buffer fast paths */
242static unsigned
243rb_event_length(struct ring_buffer_event *event)
244{
198 switch (event->type) { 245 switch (event->type) {
199 case RINGBUF_TYPE_PADDING: 246 case RINGBUF_TYPE_PADDING:
200 /* undefined */ 247 if (rb_null_event(event))
201 return -1; 248 /* undefined */
249 return -1;
250 return rb_event_data_length(event);
202 251
203 case RINGBUF_TYPE_TIME_EXTEND: 252 case RINGBUF_TYPE_TIME_EXTEND:
204 return RB_LEN_TIME_EXTEND; 253 return RB_LEN_TIME_EXTEND;
@@ -207,11 +256,7 @@ rb_event_length(struct ring_buffer_event *event)
207 return RB_LEN_TIME_STAMP; 256 return RB_LEN_TIME_STAMP;
208 257
209 case RINGBUF_TYPE_DATA: 258 case RINGBUF_TYPE_DATA:
210 if (event->len) 259 return rb_event_data_length(event);
211 length = event->len * RB_ALIGNMENT;
212 else
213 length = event->array[0];
214 return length + RB_EVNT_HDR_SIZE;
215 default: 260 default:
216 BUG(); 261 BUG();
217 } 262 }
@@ -845,11 +890,6 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
845} 890}
846EXPORT_SYMBOL_GPL(ring_buffer_resize); 891EXPORT_SYMBOL_GPL(ring_buffer_resize);
847 892
848static inline int rb_null_event(struct ring_buffer_event *event)
849{
850 return event->type == RINGBUF_TYPE_PADDING;
851}
852
853static inline void * 893static inline void *
854__rb_data_page_index(struct buffer_data_page *bpage, unsigned index) 894__rb_data_page_index(struct buffer_data_page *bpage, unsigned index)
855{ 895{
@@ -1219,7 +1259,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
1219 if (tail < BUF_PAGE_SIZE) { 1259 if (tail < BUF_PAGE_SIZE) {
1220 /* Mark the rest of the page with padding */ 1260 /* Mark the rest of the page with padding */
1221 event = __rb_page_index(tail_page, tail); 1261 event = __rb_page_index(tail_page, tail);
1222 event->type = RINGBUF_TYPE_PADDING; 1262 rb_event_set_padding(event);
1223 } 1263 }
1224 1264
1225 if (tail <= BUF_PAGE_SIZE) 1265 if (tail <= BUF_PAGE_SIZE)
@@ -1969,7 +2009,7 @@ static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer)
1969 2009
1970 event = rb_reader_event(cpu_buffer); 2010 event = rb_reader_event(cpu_buffer);
1971 2011
1972 if (event->type == RINGBUF_TYPE_DATA) 2012 if (event->type == RINGBUF_TYPE_DATA || rb_discarded_event(event))
1973 cpu_buffer->entries--; 2013 cpu_buffer->entries--;
1974 2014
1975 rb_update_read_stamp(cpu_buffer, event); 2015 rb_update_read_stamp(cpu_buffer, event);
@@ -2052,9 +2092,18 @@ rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
2052 2092
2053 switch (event->type) { 2093 switch (event->type) {
2054 case RINGBUF_TYPE_PADDING: 2094 case RINGBUF_TYPE_PADDING:
2055 RB_WARN_ON(cpu_buffer, 1); 2095 if (rb_null_event(event))
2096 RB_WARN_ON(cpu_buffer, 1);
2097 /*
2098 * Because the writer could be discarding every
2099 * event it creates (which would probably be bad)
2100 * if we were to go back to "again" then we may never
2101 * catch up, and will trigger the warn on, or lock
2102 * the box. Return the padding, and we will release
2103 * the current locks, and try again.
2104 */
2056 rb_advance_reader(cpu_buffer); 2105 rb_advance_reader(cpu_buffer);
2057 return NULL; 2106 return event;
2058 2107
2059 case RINGBUF_TYPE_TIME_EXTEND: 2108 case RINGBUF_TYPE_TIME_EXTEND:
2060 /* Internal data, OK to advance */ 2109 /* Internal data, OK to advance */
@@ -2115,8 +2164,12 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
2115 2164
2116 switch (event->type) { 2165 switch (event->type) {
2117 case RINGBUF_TYPE_PADDING: 2166 case RINGBUF_TYPE_PADDING:
2118 rb_inc_iter(iter); 2167 if (rb_null_event(event)) {
2119 goto again; 2168 rb_inc_iter(iter);
2169 goto again;
2170 }
2171 rb_advance_iter(iter);
2172 return event;
2120 2173
2121 case RINGBUF_TYPE_TIME_EXTEND: 2174 case RINGBUF_TYPE_TIME_EXTEND:
2122 /* Internal data, OK to advance */ 2175 /* Internal data, OK to advance */
@@ -2163,10 +2216,16 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
2163 if (!cpumask_test_cpu(cpu, buffer->cpumask)) 2216 if (!cpumask_test_cpu(cpu, buffer->cpumask))
2164 return NULL; 2217 return NULL;
2165 2218
2219 again:
2166 spin_lock_irqsave(&cpu_buffer->reader_lock, flags); 2220 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
2167 event = rb_buffer_peek(buffer, cpu, ts); 2221 event = rb_buffer_peek(buffer, cpu, ts);
2168 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 2222 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
2169 2223
2224 if (event && event->type == RINGBUF_TYPE_PADDING) {
2225 cpu_relax();
2226 goto again;
2227 }
2228
2170 return event; 2229 return event;
2171} 2230}
2172 2231
@@ -2185,10 +2244,16 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
2185 struct ring_buffer_event *event; 2244 struct ring_buffer_event *event;
2186 unsigned long flags; 2245 unsigned long flags;
2187 2246
2247 again:
2188 spin_lock_irqsave(&cpu_buffer->reader_lock, flags); 2248 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
2189 event = rb_iter_peek(iter, ts); 2249 event = rb_iter_peek(iter, ts);
2190 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 2250 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
2191 2251
2252 if (event && event->type == RINGBUF_TYPE_PADDING) {
2253 cpu_relax();
2254 goto again;
2255 }
2256
2192 return event; 2257 return event;
2193} 2258}
2194 2259
@@ -2207,6 +2272,7 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
2207 struct ring_buffer_event *event = NULL; 2272 struct ring_buffer_event *event = NULL;
2208 unsigned long flags; 2273 unsigned long flags;
2209 2274
2275 again:
2210 /* might be called in atomic */ 2276 /* might be called in atomic */
2211 preempt_disable(); 2277 preempt_disable();
2212 2278
@@ -2228,6 +2294,11 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
2228 out: 2294 out:
2229 preempt_enable(); 2295 preempt_enable();
2230 2296
2297 if (event && event->type == RINGBUF_TYPE_PADDING) {
2298 cpu_relax();
2299 goto again;
2300 }
2301
2231 return event; 2302 return event;
2232} 2303}
2233EXPORT_SYMBOL_GPL(ring_buffer_consume); 2304EXPORT_SYMBOL_GPL(ring_buffer_consume);
@@ -2306,6 +2377,7 @@ ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts)
2306 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; 2377 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
2307 unsigned long flags; 2378 unsigned long flags;
2308 2379
2380 again:
2309 spin_lock_irqsave(&cpu_buffer->reader_lock, flags); 2381 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
2310 event = rb_iter_peek(iter, ts); 2382 event = rb_iter_peek(iter, ts);
2311 if (!event) 2383 if (!event)
@@ -2315,6 +2387,11 @@ ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts)
2315 out: 2387 out:
2316 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 2388 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
2317 2389
2390 if (event && event->type == RINGBUF_TYPE_PADDING) {
2391 cpu_relax();
2392 goto again;
2393 }
2394
2318 return event; 2395 return event;
2319} 2396}
2320EXPORT_SYMBOL_GPL(ring_buffer_read); 2397EXPORT_SYMBOL_GPL(ring_buffer_read);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index e6fac0ffe6f0..a0174a40c563 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -255,7 +255,7 @@ static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
255 255
256/* trace_flags holds trace_options default values */ 256/* trace_flags holds trace_options default values */
257unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | 257unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
258 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO; 258 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME;
259 259
260/** 260/**
261 * trace_wake_up - wake up tasks waiting for trace input 261 * trace_wake_up - wake up tasks waiting for trace input
@@ -316,6 +316,7 @@ static const char *trace_options[] = {
316 "context-info", 316 "context-info",
317 "latency-format", 317 "latency-format",
318 "global-clock", 318 "global-clock",
319 "sleep-time",
319 NULL 320 NULL
320}; 321};
321 322
@@ -382,7 +383,7 @@ ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt)
382 return cnt; 383 return cnt;
383} 384}
384 385
385ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt) 386static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
386{ 387{
387 int len; 388 int len;
388 void *ret; 389 void *ret;
@@ -860,15 +861,25 @@ static void ftrace_trace_stack(struct trace_array *tr,
860static void ftrace_trace_userstack(struct trace_array *tr, 861static void ftrace_trace_userstack(struct trace_array *tr,
861 unsigned long flags, int pc); 862 unsigned long flags, int pc);
862 863
863void trace_buffer_unlock_commit(struct trace_array *tr, 864static inline void __trace_buffer_unlock_commit(struct trace_array *tr,
864 struct ring_buffer_event *event, 865 struct ring_buffer_event *event,
865 unsigned long flags, int pc) 866 unsigned long flags, int pc,
867 int wake)
866{ 868{
867 ring_buffer_unlock_commit(tr->buffer, event); 869 ring_buffer_unlock_commit(tr->buffer, event);
868 870
869 ftrace_trace_stack(tr, flags, 6, pc); 871 ftrace_trace_stack(tr, flags, 6, pc);
870 ftrace_trace_userstack(tr, flags, pc); 872 ftrace_trace_userstack(tr, flags, pc);
871 trace_wake_up(); 873
874 if (wake)
875 trace_wake_up();
876}
877
878void trace_buffer_unlock_commit(struct trace_array *tr,
879 struct ring_buffer_event *event,
880 unsigned long flags, int pc)
881{
882 __trace_buffer_unlock_commit(tr, event, flags, pc, 1);
872} 883}
873 884
874struct ring_buffer_event * 885struct ring_buffer_event *
@@ -882,7 +893,13 @@ trace_current_buffer_lock_reserve(unsigned char type, unsigned long len,
882void trace_current_buffer_unlock_commit(struct ring_buffer_event *event, 893void trace_current_buffer_unlock_commit(struct ring_buffer_event *event,
883 unsigned long flags, int pc) 894 unsigned long flags, int pc)
884{ 895{
885 return trace_buffer_unlock_commit(&global_trace, event, flags, pc); 896 return __trace_buffer_unlock_commit(&global_trace, event, flags, pc, 1);
897}
898
899void trace_nowake_buffer_unlock_commit(struct ring_buffer_event *event,
900 unsigned long flags, int pc)
901{
902 return __trace_buffer_unlock_commit(&global_trace, event, flags, pc, 0);
886} 903}
887 904
888void 905void
@@ -908,7 +925,7 @@ trace_function(struct trace_array *tr,
908} 925}
909 926
910#ifdef CONFIG_FUNCTION_GRAPH_TRACER 927#ifdef CONFIG_FUNCTION_GRAPH_TRACER
911static void __trace_graph_entry(struct trace_array *tr, 928static int __trace_graph_entry(struct trace_array *tr,
912 struct ftrace_graph_ent *trace, 929 struct ftrace_graph_ent *trace,
913 unsigned long flags, 930 unsigned long flags,
914 int pc) 931 int pc)
@@ -917,15 +934,17 @@ static void __trace_graph_entry(struct trace_array *tr,
917 struct ftrace_graph_ent_entry *entry; 934 struct ftrace_graph_ent_entry *entry;
918 935
919 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) 936 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
920 return; 937 return 0;
921 938
922 event = trace_buffer_lock_reserve(&global_trace, TRACE_GRAPH_ENT, 939 event = trace_buffer_lock_reserve(&global_trace, TRACE_GRAPH_ENT,
923 sizeof(*entry), flags, pc); 940 sizeof(*entry), flags, pc);
924 if (!event) 941 if (!event)
925 return; 942 return 0;
926 entry = ring_buffer_event_data(event); 943 entry = ring_buffer_event_data(event);
927 entry->graph_ent = *trace; 944 entry->graph_ent = *trace;
928 ring_buffer_unlock_commit(global_trace.buffer, event); 945 ring_buffer_unlock_commit(global_trace.buffer, event);
946
947 return 1;
929} 948}
930 949
931static void __trace_graph_return(struct trace_array *tr, 950static void __trace_graph_return(struct trace_array *tr,
@@ -1146,6 +1165,7 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
1146 struct trace_array_cpu *data; 1165 struct trace_array_cpu *data;
1147 unsigned long flags; 1166 unsigned long flags;
1148 long disabled; 1167 long disabled;
1168 int ret;
1149 int cpu; 1169 int cpu;
1150 int pc; 1170 int pc;
1151 1171
@@ -1161,15 +1181,18 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
1161 disabled = atomic_inc_return(&data->disabled); 1181 disabled = atomic_inc_return(&data->disabled);
1162 if (likely(disabled == 1)) { 1182 if (likely(disabled == 1)) {
1163 pc = preempt_count(); 1183 pc = preempt_count();
1164 __trace_graph_entry(tr, trace, flags, pc); 1184 ret = __trace_graph_entry(tr, trace, flags, pc);
1185 } else {
1186 ret = 0;
1165 } 1187 }
1166 /* Only do the atomic if it is not already set */ 1188 /* Only do the atomic if it is not already set */
1167 if (!test_tsk_trace_graph(current)) 1189 if (!test_tsk_trace_graph(current))
1168 set_tsk_trace_graph(current); 1190 set_tsk_trace_graph(current);
1191
1169 atomic_dec(&data->disabled); 1192 atomic_dec(&data->disabled);
1170 local_irq_restore(flags); 1193 local_irq_restore(flags);
1171 1194
1172 return 1; 1195 return ret;
1173} 1196}
1174 1197
1175void trace_graph_return(struct ftrace_graph_ret *trace) 1198void trace_graph_return(struct ftrace_graph_ret *trace)
@@ -3513,6 +3536,9 @@ struct dentry *tracing_init_dentry(void)
3513 if (d_tracer) 3536 if (d_tracer)
3514 return d_tracer; 3537 return d_tracer;
3515 3538
3539 if (!debugfs_initialized())
3540 return NULL;
3541
3516 d_tracer = debugfs_create_dir("tracing", NULL); 3542 d_tracer = debugfs_create_dir("tracing", NULL);
3517 3543
3518 if (!d_tracer && !once) { 3544 if (!d_tracer && !once) {
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 7cfb741be200..cb0ce3fc36d3 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -483,6 +483,8 @@ trace_current_buffer_lock_reserve(unsigned char type, unsigned long len,
483 unsigned long flags, int pc); 483 unsigned long flags, int pc);
484void trace_current_buffer_unlock_commit(struct ring_buffer_event *event, 484void trace_current_buffer_unlock_commit(struct ring_buffer_event *event,
485 unsigned long flags, int pc); 485 unsigned long flags, int pc);
486void trace_nowake_buffer_unlock_commit(struct ring_buffer_event *event,
487 unsigned long flags, int pc);
486 488
487struct trace_entry *tracing_get_trace_entry(struct trace_array *tr, 489struct trace_entry *tracing_get_trace_entry(struct trace_array *tr,
488 struct trace_array_cpu *data); 490 struct trace_array_cpu *data);
@@ -683,6 +685,7 @@ enum trace_iterator_flags {
683 TRACE_ITER_CONTEXT_INFO = 0x20000, /* Print pid/cpu/time */ 685 TRACE_ITER_CONTEXT_INFO = 0x20000, /* Print pid/cpu/time */
684 TRACE_ITER_LATENCY_FMT = 0x40000, 686 TRACE_ITER_LATENCY_FMT = 0x40000,
685 TRACE_ITER_GLOBAL_CLK = 0x80000, 687 TRACE_ITER_GLOBAL_CLK = 0x80000,
688 TRACE_ITER_SLEEP_TIME = 0x100000,
686}; 689};
687 690
688/* 691/*
@@ -775,16 +778,27 @@ enum {
775 TRACE_EVENT_TYPE_RAW = 2, 778 TRACE_EVENT_TYPE_RAW = 2,
776}; 779};
777 780
781struct ftrace_event_field {
782 struct list_head link;
783 char *name;
784 char *type;
785 int offset;
786 int size;
787};
788
778struct ftrace_event_call { 789struct ftrace_event_call {
779 char *name; 790 char *name;
780 char *system; 791 char *system;
781 struct dentry *dir; 792 struct dentry *dir;
782 int enabled; 793 int enabled;
783 int (*regfunc)(void); 794 int (*regfunc)(void);
784 void (*unregfunc)(void); 795 void (*unregfunc)(void);
785 int id; 796 int id;
786 int (*raw_init)(void); 797 int (*raw_init)(void);
787 int (*show_format)(struct trace_seq *s); 798 int (*show_format)(struct trace_seq *s);
799 int (*define_fields)(void);
800 struct list_head fields;
801 struct filter_pred **preds;
788 802
789#ifdef CONFIG_EVENT_PROFILE 803#ifdef CONFIG_EVENT_PROFILE
790 atomic_t profile_count; 804 atomic_t profile_count;
@@ -793,6 +807,51 @@ struct ftrace_event_call {
793#endif 807#endif
794}; 808};
795 809
810struct event_subsystem {
811 struct list_head list;
812 const char *name;
813 struct dentry *entry;
814 struct filter_pred **preds;
815};
816
817#define events_for_each(event) \
818 for (event = __start_ftrace_events; \
819 (unsigned long)event < (unsigned long)__stop_ftrace_events; \
820 event++)
821
822#define MAX_FILTER_PRED 8
823
824struct filter_pred;
825
826typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event);
827
828struct filter_pred {
829 filter_pred_fn_t fn;
830 u64 val;
831 char *str_val;
832 int str_len;
833 char *field_name;
834 int offset;
835 int not;
836 int or;
837 int compound;
838 int clear;
839};
840
841int trace_define_field(struct ftrace_event_call *call, char *type,
842 char *name, int offset, int size);
843extern void filter_free_pred(struct filter_pred *pred);
844extern void filter_print_preds(struct filter_pred **preds,
845 struct trace_seq *s);
846extern int filter_parse(char **pbuf, struct filter_pred *pred);
847extern int filter_add_pred(struct ftrace_event_call *call,
848 struct filter_pred *pred);
849extern void filter_free_preds(struct ftrace_event_call *call);
850extern int filter_match_preds(struct ftrace_event_call *call, void *rec);
851extern void filter_free_subsystem_preds(struct event_subsystem *system);
852extern int filter_add_subsystem_pred(struct event_subsystem *system,
853 struct filter_pred *pred);
854
796void event_trace_printk(unsigned long ip, const char *fmt, ...); 855void event_trace_printk(unsigned long ip, const char *fmt, ...);
797extern struct ftrace_event_call __start_ftrace_events[]; 856extern struct ftrace_event_call __start_ftrace_events[];
798extern struct ftrace_event_call __stop_ftrace_events[]; 857extern struct ftrace_event_call __stop_ftrace_events[];
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c
index 05b176abfd30..b588fd81f7f9 100644
--- a/kernel/trace/trace_clock.c
+++ b/kernel/trace/trace_clock.c
@@ -18,6 +18,7 @@
18#include <linux/percpu.h> 18#include <linux/percpu.h>
19#include <linux/sched.h> 19#include <linux/sched.h>
20#include <linux/ktime.h> 20#include <linux/ktime.h>
21#include <linux/trace_clock.h>
21 22
22/* 23/*
23 * trace_clock_local(): the simplest and least coherent tracing clock. 24 * trace_clock_local(): the simplest and least coherent tracing clock.
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 3047b56f6637..64ec4d278ffb 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -19,6 +19,39 @@
19 19
20static DEFINE_MUTEX(event_mutex); 20static DEFINE_MUTEX(event_mutex);
21 21
22int trace_define_field(struct ftrace_event_call *call, char *type,
23 char *name, int offset, int size)
24{
25 struct ftrace_event_field *field;
26
27 field = kzalloc(sizeof(*field), GFP_KERNEL);
28 if (!field)
29 goto err;
30
31 field->name = kstrdup(name, GFP_KERNEL);
32 if (!field->name)
33 goto err;
34
35 field->type = kstrdup(type, GFP_KERNEL);
36 if (!field->type)
37 goto err;
38
39 field->offset = offset;
40 field->size = size;
41 list_add(&field->link, &call->fields);
42
43 return 0;
44
45err:
46 if (field) {
47 kfree(field->name);
48 kfree(field->type);
49 }
50 kfree(field);
51
52 return -ENOMEM;
53}
54
22static void ftrace_clear_events(void) 55static void ftrace_clear_events(void)
23{ 56{
24 struct ftrace_event_call *call = (void *)__start_ftrace_events; 57 struct ftrace_event_call *call = (void *)__start_ftrace_events;
@@ -343,7 +376,8 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
343 376
344#undef FIELD 377#undef FIELD
345#define FIELD(type, name) \ 378#define FIELD(type, name) \
346 #type, #name, offsetof(typeof(field), name), sizeof(field.name) 379 #type, "common_" #name, offsetof(typeof(field), name), \
380 sizeof(field.name)
347 381
348static int trace_write_header(struct trace_seq *s) 382static int trace_write_header(struct trace_seq *s)
349{ 383{
@@ -430,6 +464,139 @@ event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
430 return r; 464 return r;
431} 465}
432 466
467static ssize_t
468event_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
469 loff_t *ppos)
470{
471 struct ftrace_event_call *call = filp->private_data;
472 struct trace_seq *s;
473 int r;
474
475 if (*ppos)
476 return 0;
477
478 s = kmalloc(sizeof(*s), GFP_KERNEL);
479 if (!s)
480 return -ENOMEM;
481
482 trace_seq_init(s);
483
484 filter_print_preds(call->preds, s);
485 r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
486
487 kfree(s);
488
489 return r;
490}
491
492static ssize_t
493event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
494 loff_t *ppos)
495{
496 struct ftrace_event_call *call = filp->private_data;
497 char buf[64], *pbuf = buf;
498 struct filter_pred *pred;
499 int err;
500
501 if (cnt >= sizeof(buf))
502 return -EINVAL;
503
504 if (copy_from_user(&buf, ubuf, cnt))
505 return -EFAULT;
506
507 pred = kzalloc(sizeof(*pred), GFP_KERNEL);
508 if (!pred)
509 return -ENOMEM;
510
511 err = filter_parse(&pbuf, pred);
512 if (err < 0) {
513 filter_free_pred(pred);
514 return err;
515 }
516
517 if (pred->clear) {
518 filter_free_preds(call);
519 filter_free_pred(pred);
520 return cnt;
521 }
522
523 if (filter_add_pred(call, pred)) {
524 filter_free_pred(pred);
525 return -EINVAL;
526 }
527
528 *ppos += cnt;
529
530 return cnt;
531}
532
533static ssize_t
534subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
535 loff_t *ppos)
536{
537 struct event_subsystem *system = filp->private_data;
538 struct trace_seq *s;
539 int r;
540
541 if (*ppos)
542 return 0;
543
544 s = kmalloc(sizeof(*s), GFP_KERNEL);
545 if (!s)
546 return -ENOMEM;
547
548 trace_seq_init(s);
549
550 filter_print_preds(system->preds, s);
551 r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
552
553 kfree(s);
554
555 return r;
556}
557
558static ssize_t
559subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
560 loff_t *ppos)
561{
562 struct event_subsystem *system = filp->private_data;
563 char buf[64], *pbuf = buf;
564 struct filter_pred *pred;
565 int err;
566
567 if (cnt >= sizeof(buf))
568 return -EINVAL;
569
570 if (copy_from_user(&buf, ubuf, cnt))
571 return -EFAULT;
572
573 pred = kzalloc(sizeof(*pred), GFP_KERNEL);
574 if (!pred)
575 return -ENOMEM;
576
577 err = filter_parse(&pbuf, pred);
578 if (err < 0) {
579 filter_free_pred(pred);
580 return err;
581 }
582
583 if (pred->clear) {
584 filter_free_subsystem_preds(system);
585 filter_free_pred(pred);
586 return cnt;
587 }
588
589 if (filter_add_subsystem_pred(system, pred)) {
590 filter_free_subsystem_preds(system);
591 filter_free_pred(pred);
592 return -EINVAL;
593 }
594
595 *ppos += cnt;
596
597 return cnt;
598}
599
433static const struct seq_operations show_event_seq_ops = { 600static const struct seq_operations show_event_seq_ops = {
434 .start = t_start, 601 .start = t_start,
435 .next = t_next, 602 .next = t_next,
@@ -475,6 +642,18 @@ static const struct file_operations ftrace_event_id_fops = {
475 .read = event_id_read, 642 .read = event_id_read,
476}; 643};
477 644
645static const struct file_operations ftrace_event_filter_fops = {
646 .open = tracing_open_generic,
647 .read = event_filter_read,
648 .write = event_filter_write,
649};
650
651static const struct file_operations ftrace_subsystem_filter_fops = {
652 .open = tracing_open_generic,
653 .read = subsystem_filter_read,
654 .write = subsystem_filter_write,
655};
656
478static struct dentry *event_trace_events_dir(void) 657static struct dentry *event_trace_events_dir(void)
479{ 658{
480 static struct dentry *d_tracer; 659 static struct dentry *d_tracer;
@@ -495,12 +674,6 @@ static struct dentry *event_trace_events_dir(void)
495 return d_events; 674 return d_events;
496} 675}
497 676
498struct event_subsystem {
499 struct list_head list;
500 const char *name;
501 struct dentry *entry;
502};
503
504static LIST_HEAD(event_subsystems); 677static LIST_HEAD(event_subsystems);
505 678
506static struct dentry * 679static struct dentry *
@@ -533,6 +706,8 @@ event_subsystem_dir(const char *name, struct dentry *d_events)
533 system->name = name; 706 system->name = name;
534 list_add(&system->list, &event_subsystems); 707 list_add(&system->list, &event_subsystems);
535 708
709 system->preds = NULL;
710
536 return system->entry; 711 return system->entry;
537} 712}
538 713
@@ -581,6 +756,20 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events)
581 call->name); 756 call->name);
582 } 757 }
583 758
759 if (call->define_fields) {
760 ret = call->define_fields();
761 if (ret < 0) {
762 pr_warning("Could not initialize trace point"
763 " events/%s\n", call->name);
764 return ret;
765 }
766 entry = debugfs_create_file("filter", 0644, call->dir, call,
767 &ftrace_event_filter_fops);
768 if (!entry)
769 pr_warning("Could not create debugfs "
770 "'%s/filter' entry\n", call->name);
771 }
772
584 /* A trace may not want to export its format */ 773 /* A trace may not want to export its format */
585 if (!call->show_format) 774 if (!call->show_format)
586 return 0; 775 return 0;
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
new file mode 100644
index 000000000000..026be412f356
--- /dev/null
+++ b/kernel/trace/trace_events_filter.c
@@ -0,0 +1,427 @@
1/*
2 * trace_events_filter - generic event filtering
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * Copyright (C) 2009 Tom Zanussi <tzanussi@gmail.com>
19 */
20
21#include <linux/debugfs.h>
22#include <linux/uaccess.h>
23#include <linux/module.h>
24#include <linux/ctype.h>
25
26#include "trace.h"
27#include "trace_output.h"
28
29static int filter_pred_64(struct filter_pred *pred, void *event)
30{
31 u64 *addr = (u64 *)(event + pred->offset);
32 u64 val = (u64)pred->val;
33 int match;
34
35 match = (val == *addr) ^ pred->not;
36
37 return match;
38}
39
40static int filter_pred_32(struct filter_pred *pred, void *event)
41{
42 u32 *addr = (u32 *)(event + pred->offset);
43 u32 val = (u32)pred->val;
44 int match;
45
46 match = (val == *addr) ^ pred->not;
47
48 return match;
49}
50
51static int filter_pred_16(struct filter_pred *pred, void *event)
52{
53 u16 *addr = (u16 *)(event + pred->offset);
54 u16 val = (u16)pred->val;
55 int match;
56
57 match = (val == *addr) ^ pred->not;
58
59 return match;
60}
61
62static int filter_pred_8(struct filter_pred *pred, void *event)
63{
64 u8 *addr = (u8 *)(event + pred->offset);
65 u8 val = (u8)pred->val;
66 int match;
67
68 match = (val == *addr) ^ pred->not;
69
70 return match;
71}
72
73static int filter_pred_string(struct filter_pred *pred, void *event)
74{
75 char *addr = (char *)(event + pred->offset);
76 int cmp, match;
77
78 cmp = strncmp(addr, pred->str_val, pred->str_len);
79
80 match = (!cmp) ^ pred->not;
81
82 return match;
83}
84
85/* return 1 if event matches, 0 otherwise (discard) */
86int filter_match_preds(struct ftrace_event_call *call, void *rec)
87{
88 int i, matched, and_failed = 0;
89 struct filter_pred *pred;
90
91 for (i = 0; i < MAX_FILTER_PRED; i++) {
92 if (call->preds[i]) {
93 pred = call->preds[i];
94 if (and_failed && !pred->or)
95 continue;
96 matched = pred->fn(pred, rec);
97 if (!matched && !pred->or) {
98 and_failed = 1;
99 continue;
100 } else if (matched && pred->or)
101 return 1;
102 } else
103 break;
104 }
105
106 if (and_failed)
107 return 0;
108
109 return 1;
110}
111
112void filter_print_preds(struct filter_pred **preds, struct trace_seq *s)
113{
114 char *field_name;
115 struct filter_pred *pred;
116 int i;
117
118 if (!preds) {
119 trace_seq_printf(s, "none\n");
120 return;
121 }
122
123 for (i = 0; i < MAX_FILTER_PRED; i++) {
124 if (preds[i]) {
125 pred = preds[i];
126 field_name = pred->field_name;
127 if (i)
128 trace_seq_printf(s, pred->or ? "|| " : "&& ");
129 trace_seq_printf(s, "%s ", field_name);
130 trace_seq_printf(s, pred->not ? "!= " : "== ");
131 if (pred->str_val)
132 trace_seq_printf(s, "%s\n", pred->str_val);
133 else
134 trace_seq_printf(s, "%llu\n", pred->val);
135 } else
136 break;
137 }
138}
139
140static struct ftrace_event_field *
141find_event_field(struct ftrace_event_call *call, char *name)
142{
143 struct ftrace_event_field *field;
144
145 list_for_each_entry(field, &call->fields, link) {
146 if (!strcmp(field->name, name))
147 return field;
148 }
149
150 return NULL;
151}
152
153void filter_free_pred(struct filter_pred *pred)
154{
155 if (!pred)
156 return;
157
158 kfree(pred->field_name);
159 kfree(pred->str_val);
160 kfree(pred);
161}
162
163void filter_free_preds(struct ftrace_event_call *call)
164{
165 int i;
166
167 if (call->preds) {
168 for (i = 0; i < MAX_FILTER_PRED; i++)
169 filter_free_pred(call->preds[i]);
170 kfree(call->preds);
171 call->preds = NULL;
172 }
173}
174
175void filter_free_subsystem_preds(struct event_subsystem *system)
176{
177 struct ftrace_event_call *call = __start_ftrace_events;
178 int i;
179
180 if (system->preds) {
181 for (i = 0; i < MAX_FILTER_PRED; i++)
182 filter_free_pred(system->preds[i]);
183 kfree(system->preds);
184 system->preds = NULL;
185 }
186
187 events_for_each(call) {
188 if (!call->name || !call->regfunc)
189 continue;
190
191 if (!strcmp(call->system, system->name))
192 filter_free_preds(call);
193 }
194}
195
196static int __filter_add_pred(struct ftrace_event_call *call,
197 struct filter_pred *pred)
198{
199 int i;
200
201 if (call->preds && !pred->compound)
202 filter_free_preds(call);
203
204 if (!call->preds) {
205 call->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred),
206 GFP_KERNEL);
207 if (!call->preds)
208 return -ENOMEM;
209 }
210
211 for (i = 0; i < MAX_FILTER_PRED; i++) {
212 if (!call->preds[i]) {
213 call->preds[i] = pred;
214 return 0;
215 }
216 }
217
218 return -ENOMEM;
219}
220
221static int is_string_field(const char *type)
222{
223 if (strchr(type, '[') && strstr(type, "char"))
224 return 1;
225
226 return 0;
227}
228
229int filter_add_pred(struct ftrace_event_call *call, struct filter_pred *pred)
230{
231 struct ftrace_event_field *field;
232
233 field = find_event_field(call, pred->field_name);
234 if (!field)
235 return -EINVAL;
236
237 pred->offset = field->offset;
238
239 if (is_string_field(field->type)) {
240 if (!pred->str_val)
241 return -EINVAL;
242 pred->fn = filter_pred_string;
243 pred->str_len = field->size;
244 return __filter_add_pred(call, pred);
245 } else {
246 if (pred->str_val)
247 return -EINVAL;
248 }
249
250 switch (field->size) {
251 case 8:
252 pred->fn = filter_pred_64;
253 break;
254 case 4:
255 pred->fn = filter_pred_32;
256 break;
257 case 2:
258 pred->fn = filter_pred_16;
259 break;
260 case 1:
261 pred->fn = filter_pred_8;
262 break;
263 default:
264 return -EINVAL;
265 }
266
267 return __filter_add_pred(call, pred);
268}
269
270static struct filter_pred *copy_pred(struct filter_pred *pred)
271{
272 struct filter_pred *new_pred = kmalloc(sizeof(*pred), GFP_KERNEL);
273 if (!new_pred)
274 return NULL;
275
276 memcpy(new_pred, pred, sizeof(*pred));
277
278 if (pred->field_name) {
279 new_pred->field_name = kstrdup(pred->field_name, GFP_KERNEL);
280 if (!new_pred->field_name) {
281 kfree(new_pred);
282 return NULL;
283 }
284 }
285
286 if (pred->str_val) {
287 new_pred->str_val = kstrdup(pred->str_val, GFP_KERNEL);
288 if (!new_pred->str_val) {
289 filter_free_pred(new_pred);
290 return NULL;
291 }
292 }
293
294 return new_pred;
295}
296
297int filter_add_subsystem_pred(struct event_subsystem *system,
298 struct filter_pred *pred)
299{
300 struct ftrace_event_call *call = __start_ftrace_events;
301 struct filter_pred *event_pred;
302 int i;
303
304 if (system->preds && !pred->compound)
305 filter_free_subsystem_preds(system);
306
307 if (!system->preds) {
308 system->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred),
309 GFP_KERNEL);
310 if (!system->preds)
311 return -ENOMEM;
312 }
313
314 for (i = 0; i < MAX_FILTER_PRED; i++) {
315 if (!system->preds[i]) {
316 system->preds[i] = pred;
317 break;
318 }
319 }
320
321 if (i == MAX_FILTER_PRED)
322 return -EINVAL;
323
324 events_for_each(call) {
325 int err;
326
327 if (!call->name || !call->regfunc)
328 continue;
329
330 if (strcmp(call->system, system->name))
331 continue;
332
333 if (!find_event_field(call, pred->field_name))
334 continue;
335
336 event_pred = copy_pred(pred);
337 if (!event_pred)
338 goto oom;
339
340 err = filter_add_pred(call, event_pred);
341 if (err)
342 filter_free_pred(event_pred);
343 if (err == -ENOMEM)
344 goto oom;
345 }
346
347 return 0;
348
349oom:
350 system->preds[i] = NULL;
351 return -ENOMEM;
352}
353
354int filter_parse(char **pbuf, struct filter_pred *pred)
355{
356 char *tmp, *tok, *val_str = NULL;
357 int tok_n = 0;
358
359 /* field ==/!= number, or/and field ==/!= number, number */
360 while ((tok = strsep(pbuf, " \n"))) {
361 if (tok_n == 0) {
362 if (!strcmp(tok, "0")) {
363 pred->clear = 1;
364 return 0;
365 } else if (!strcmp(tok, "&&")) {
366 pred->or = 0;
367 pred->compound = 1;
368 } else if (!strcmp(tok, "||")) {
369 pred->or = 1;
370 pred->compound = 1;
371 } else
372 pred->field_name = tok;
373 tok_n = 1;
374 continue;
375 }
376 if (tok_n == 1) {
377 if (!pred->field_name)
378 pred->field_name = tok;
379 else if (!strcmp(tok, "!="))
380 pred->not = 1;
381 else if (!strcmp(tok, "=="))
382 pred->not = 0;
383 else {
384 pred->field_name = NULL;
385 return -EINVAL;
386 }
387 tok_n = 2;
388 continue;
389 }
390 if (tok_n == 2) {
391 if (pred->compound) {
392 if (!strcmp(tok, "!="))
393 pred->not = 1;
394 else if (!strcmp(tok, "=="))
395 pred->not = 0;
396 else {
397 pred->field_name = NULL;
398 return -EINVAL;
399 }
400 } else {
401 val_str = tok;
402 break; /* done */
403 }
404 tok_n = 3;
405 continue;
406 }
407 if (tok_n == 3) {
408 val_str = tok;
409 break; /* done */
410 }
411 }
412
413 pred->field_name = kstrdup(pred->field_name, GFP_KERNEL);
414 if (!pred->field_name)
415 return -ENOMEM;
416
417 pred->val = simple_strtoull(val_str, &tmp, 10);
418 if (tmp == val_str) {
419 pred->str_val = kstrdup(val_str, GFP_KERNEL);
420 if (!pred->str_val)
421 return -ENOMEM;
422 }
423
424 return 0;
425}
426
427
diff --git a/kernel/trace/trace_events_stage_2.h b/kernel/trace/trace_events_stage_2.h
index 5117c43f5c67..30743f7d4110 100644
--- a/kernel/trace/trace_events_stage_2.h
+++ b/kernel/trace/trace_events_stage_2.h
@@ -129,3 +129,48 @@ ftrace_format_##call(struct trace_seq *s) \
129} 129}
130 130
131#include <trace/trace_event_types.h> 131#include <trace/trace_event_types.h>
132
133#undef __field
134#define __field(type, item) \
135 ret = trace_define_field(event_call, #type, #item, \
136 offsetof(typeof(field), item), \
137 sizeof(field.item)); \
138 if (ret) \
139 return ret;
140
141#undef __array
142#define __array(type, item, len) \
143 ret = trace_define_field(event_call, #type "[" #len "]", #item, \
144 offsetof(typeof(field), item), \
145 sizeof(field.item)); \
146 if (ret) \
147 return ret;
148
149#define __common_field(type, item) \
150 ret = trace_define_field(event_call, #type, "common_" #item, \
151 offsetof(typeof(field.ent), item), \
152 sizeof(field.ent.item)); \
153 if (ret) \
154 return ret;
155
156#undef TRACE_EVENT
157#define TRACE_EVENT(call, proto, args, tstruct, func, print) \
158int \
159ftrace_define_fields_##call(void) \
160{ \
161 struct ftrace_raw_##call field; \
162 struct ftrace_event_call *event_call = &event_##call; \
163 int ret; \
164 \
165 __common_field(unsigned char, type); \
166 __common_field(unsigned char, flags); \
167 __common_field(unsigned char, preempt_count); \
168 __common_field(int, pid); \
169 __common_field(int, tgid); \
170 \
171 tstruct; \
172 \
173 return ret; \
174}
175
176#include <trace/trace_event_types.h>
diff --git a/kernel/trace/trace_events_stage_3.h b/kernel/trace/trace_events_stage_3.h
index 6b3261ca988c..9d2fa78cecca 100644
--- a/kernel/trace/trace_events_stage_3.h
+++ b/kernel/trace/trace_events_stage_3.h
@@ -204,6 +204,7 @@ static struct ftrace_event_call event_##call; \
204 \ 204 \
205static void ftrace_raw_event_##call(proto) \ 205static void ftrace_raw_event_##call(proto) \
206{ \ 206{ \
207 struct ftrace_event_call *call = &event_##call; \
207 struct ring_buffer_event *event; \ 208 struct ring_buffer_event *event; \
208 struct ftrace_raw_##call *entry; \ 209 struct ftrace_raw_##call *entry; \
209 unsigned long irq_flags; \ 210 unsigned long irq_flags; \
@@ -221,7 +222,11 @@ static void ftrace_raw_event_##call(proto) \
221 \ 222 \
222 assign; \ 223 assign; \
223 \ 224 \
224 trace_current_buffer_unlock_commit(event, irq_flags, pc); \ 225 if (call->preds && !filter_match_preds(call, entry)) \
226 ring_buffer_event_discard(event); \
227 \
228 trace_nowake_buffer_unlock_commit(event, irq_flags, pc); \
229 \
225} \ 230} \
226 \ 231 \
227static int ftrace_raw_reg_event_##call(void) \ 232static int ftrace_raw_reg_event_##call(void) \
@@ -252,6 +257,7 @@ static int ftrace_raw_init_event_##call(void) \
252 if (!id) \ 257 if (!id) \
253 return -ENODEV; \ 258 return -ENODEV; \
254 event_##call.id = id; \ 259 event_##call.id = id; \
260 INIT_LIST_HEAD(&event_##call.fields); \
255 return 0; \ 261 return 0; \
256} \ 262} \
257 \ 263 \
@@ -264,6 +270,7 @@ __attribute__((section("_ftrace_events"))) event_##call = { \
264 .regfunc = ftrace_raw_reg_event_##call, \ 270 .regfunc = ftrace_raw_reg_event_##call, \
265 .unregfunc = ftrace_raw_unreg_event_##call, \ 271 .unregfunc = ftrace_raw_unreg_event_##call, \
266 .show_format = ftrace_format_##call, \ 272 .show_format = ftrace_format_##call, \
273 .define_fields = ftrace_define_fields_##call, \
267 _TRACE_PROFILE_INIT(call) \ 274 _TRACE_PROFILE_INIT(call) \
268} 275}
269 276
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index e876816fa8e7..d28687e7b3a7 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -57,9 +57,9 @@ static struct tracer_flags tracer_flags = {
57 57
58/* Add a function return address to the trace stack on thread info.*/ 58/* Add a function return address to the trace stack on thread info.*/
59int 59int
60ftrace_push_return_trace(unsigned long ret, unsigned long long time, 60ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth)
61 unsigned long func, int *depth)
62{ 61{
62 unsigned long long calltime;
63 int index; 63 int index;
64 64
65 if (!current->ret_stack) 65 if (!current->ret_stack)
@@ -71,11 +71,13 @@ ftrace_push_return_trace(unsigned long ret, unsigned long long time,
71 return -EBUSY; 71 return -EBUSY;
72 } 72 }
73 73
74 calltime = trace_clock_local();
75
74 index = ++current->curr_ret_stack; 76 index = ++current->curr_ret_stack;
75 barrier(); 77 barrier();
76 current->ret_stack[index].ret = ret; 78 current->ret_stack[index].ret = ret;
77 current->ret_stack[index].func = func; 79 current->ret_stack[index].func = func;
78 current->ret_stack[index].calltime = time; 80 current->ret_stack[index].calltime = calltime;
79 *depth = index; 81 *depth = index;
80 82
81 return 0; 83 return 0;
diff --git a/kernel/trace/trace_nop.c b/kernel/trace/trace_nop.c
index 9aa84bde23cd..394f94417e2f 100644
--- a/kernel/trace/trace_nop.c
+++ b/kernel/trace/trace_nop.c
@@ -91,6 +91,7 @@ struct tracer nop_trace __read_mostly =
91 .name = "nop", 91 .name = "nop",
92 .init = nop_trace_init, 92 .init = nop_trace_init,
93 .reset = nop_trace_reset, 93 .reset = nop_trace_reset,
94 .wait_pipe = poll_wait_pipe,
94#ifdef CONFIG_FTRACE_SELFTEST 95#ifdef CONFIG_FTRACE_SELFTEST
95 .selftest = trace_selftest_startup_nop, 96 .selftest = trace_selftest_startup_nop,
96#endif 97#endif
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 19261fdd2455..d72b9a63b247 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -137,7 +137,7 @@ int trace_seq_putc(struct trace_seq *s, unsigned char c)
137 return 1; 137 return 1;
138} 138}
139 139
140int trace_seq_putmem(struct trace_seq *s, void *mem, size_t len) 140int trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len)
141{ 141{
142 if (len > ((PAGE_SIZE - 1) - s->len)) 142 if (len > ((PAGE_SIZE - 1) - s->len))
143 return 0; 143 return 0;
@@ -148,10 +148,10 @@ int trace_seq_putmem(struct trace_seq *s, void *mem, size_t len)
148 return len; 148 return len;
149} 149}
150 150
151int trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len) 151int trace_seq_putmem_hex(struct trace_seq *s, const void *mem, size_t len)
152{ 152{
153 unsigned char hex[HEX_CHARS]; 153 unsigned char hex[HEX_CHARS];
154 unsigned char *data = mem; 154 const unsigned char *data = mem;
155 int i, j; 155 int i, j;
156 156
157#ifdef __BIG_ENDIAN 157#ifdef __BIG_ENDIAN
@@ -167,6 +167,19 @@ int trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len)
167 return trace_seq_putmem(s, hex, j); 167 return trace_seq_putmem(s, hex, j);
168} 168}
169 169
170void *trace_seq_reserve(struct trace_seq *s, size_t len)
171{
172 void *ret;
173
174 if (len > ((PAGE_SIZE - 1) - s->len))
175 return NULL;
176
177 ret = s->buffer + s->len;
178 s->len += len;
179
180 return ret;
181}
182
170int trace_seq_path(struct trace_seq *s, struct path *path) 183int trace_seq_path(struct trace_seq *s, struct path *path)
171{ 184{
172 unsigned char *p; 185 unsigned char *p;
diff --git a/kernel/trace/trace_output.h b/kernel/trace/trace_output.h
index 35c422fb51a9..e0bde39c2dd9 100644
--- a/kernel/trace/trace_output.h
+++ b/kernel/trace/trace_output.h
@@ -29,24 +29,27 @@ seq_print_ip_sym(struct trace_seq *s, unsigned long ip,
29 unsigned long sym_flags); 29 unsigned long sym_flags);
30extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, 30extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
31 size_t cnt); 31 size_t cnt);
32int trace_seq_puts(struct trace_seq *s, const char *str); 32extern int trace_seq_puts(struct trace_seq *s, const char *str);
33int trace_seq_putc(struct trace_seq *s, unsigned char c); 33extern int trace_seq_putc(struct trace_seq *s, unsigned char c);
34int trace_seq_putmem(struct trace_seq *s, void *mem, size_t len); 34extern int trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len);
35int trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len); 35extern int trace_seq_putmem_hex(struct trace_seq *s, const void *mem,
36int trace_seq_path(struct trace_seq *s, struct path *path); 36 size_t len);
37int seq_print_userip_objs(const struct userstack_entry *entry, 37extern void *trace_seq_reserve(struct trace_seq *s, size_t len);
38 struct trace_seq *s, unsigned long sym_flags); 38extern int trace_seq_path(struct trace_seq *s, struct path *path);
39int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm, 39extern int seq_print_userip_objs(const struct userstack_entry *entry,
40 unsigned long ip, unsigned long sym_flags); 40 struct trace_seq *s, unsigned long sym_flags);
41extern int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm,
42 unsigned long ip, unsigned long sym_flags);
41 43
42int trace_print_context(struct trace_iterator *iter); 44extern int trace_print_context(struct trace_iterator *iter);
43int trace_print_lat_context(struct trace_iterator *iter); 45extern int trace_print_lat_context(struct trace_iterator *iter);
44 46
45struct trace_event *ftrace_find_event(int type); 47extern struct trace_event *ftrace_find_event(int type);
46int register_ftrace_event(struct trace_event *event); 48extern int register_ftrace_event(struct trace_event *event);
47int unregister_ftrace_event(struct trace_event *event); 49extern int unregister_ftrace_event(struct trace_event *event);
48 50
49enum print_line_t trace_nop_print(struct trace_iterator *iter, int flags); 51extern enum print_line_t trace_nop_print(struct trace_iterator *iter,
52 int flags);
50 53
51#define MAX_MEMHEX_BYTES 8 54#define MAX_MEMHEX_BYTES 8
52#define HEX_CHARS (MAX_MEMHEX_BYTES*2 + 1) 55#define HEX_CHARS (MAX_MEMHEX_BYTES*2 + 1)
diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c
index 39310e3434ee..acdebd771a93 100644
--- a/kernel/trace/trace_stat.c
+++ b/kernel/trace/trace_stat.c
@@ -75,7 +75,7 @@ static int stat_seq_init(struct tracer_stat_session *session)
75{ 75{
76 struct trace_stat_list *iter_entry, *new_entry; 76 struct trace_stat_list *iter_entry, *new_entry;
77 struct tracer_stat *ts = session->ts; 77 struct tracer_stat *ts = session->ts;
78 void *prev_stat; 78 void *stat;
79 int ret = 0; 79 int ret = 0;
80 int i; 80 int i;
81 81
@@ -85,6 +85,10 @@ static int stat_seq_init(struct tracer_stat_session *session)
85 if (!ts->stat_cmp) 85 if (!ts->stat_cmp)
86 ts->stat_cmp = dummy_cmp; 86 ts->stat_cmp = dummy_cmp;
87 87
88 stat = ts->stat_start();
89 if (!stat)
90 goto exit;
91
88 /* 92 /*
89 * The first entry. Actually this is the second, but the first 93 * The first entry. Actually this is the second, but the first
90 * one (the stat_list head) is pointless. 94 * one (the stat_list head) is pointless.
@@ -99,14 +103,19 @@ static int stat_seq_init(struct tracer_stat_session *session)
99 103
100 list_add(&new_entry->list, &session->stat_list); 104 list_add(&new_entry->list, &session->stat_list);
101 105
102 new_entry->stat = ts->stat_start(); 106 new_entry->stat = stat;
103 prev_stat = new_entry->stat;
104 107
105 /* 108 /*
106 * Iterate over the tracer stat entries and store them in a sorted 109 * Iterate over the tracer stat entries and store them in a sorted
107 * list. 110 * list.
108 */ 111 */
109 for (i = 1; ; i++) { 112 for (i = 1; ; i++) {
113 stat = ts->stat_next(stat, i);
114
115 /* End of insertion */
116 if (!stat)
117 break;
118
110 new_entry = kmalloc(sizeof(struct trace_stat_list), GFP_KERNEL); 119 new_entry = kmalloc(sizeof(struct trace_stat_list), GFP_KERNEL);
111 if (!new_entry) { 120 if (!new_entry) {
112 ret = -ENOMEM; 121 ret = -ENOMEM;
@@ -114,31 +123,23 @@ static int stat_seq_init(struct tracer_stat_session *session)
114 } 123 }
115 124
116 INIT_LIST_HEAD(&new_entry->list); 125 INIT_LIST_HEAD(&new_entry->list);
117 new_entry->stat = ts->stat_next(prev_stat, i); 126 new_entry->stat = stat;
118 127
119 /* End of insertion */ 128 list_for_each_entry_reverse(iter_entry, &session->stat_list,
120 if (!new_entry->stat) 129 list) {
121 break;
122
123 list_for_each_entry(iter_entry, &session->stat_list, list) {
124 130
125 /* Insertion with a descendent sorting */ 131 /* Insertion with a descendent sorting */
126 if (ts->stat_cmp(new_entry->stat, 132 if (ts->stat_cmp(iter_entry->stat,
127 iter_entry->stat) > 0) { 133 new_entry->stat) >= 0) {
128
129 list_add_tail(&new_entry->list,
130 &iter_entry->list);
131 break;
132 134
133 /* The current smaller value */
134 } else if (list_is_last(&iter_entry->list,
135 &session->stat_list)) {
136 list_add(&new_entry->list, &iter_entry->list); 135 list_add(&new_entry->list, &iter_entry->list);
137 break; 136 break;
138 } 137 }
139 } 138 }
140 139
141 prev_stat = new_entry->stat; 140 /* The current larger value */
141 if (list_empty(&new_entry->list))
142 list_add(&new_entry->list, &session->stat_list);
142 } 143 }
143exit: 144exit:
144 mutex_unlock(&session->stat_mutex); 145 mutex_unlock(&session->stat_mutex);
@@ -160,7 +161,7 @@ static void *stat_seq_start(struct seq_file *s, loff_t *pos)
160 161
161 /* If we are in the beginning of the file, print the headers */ 162 /* If we are in the beginning of the file, print the headers */
162 if (!*pos && session->ts->stat_headers) 163 if (!*pos && session->ts->stat_headers)
163 session->ts->stat_headers(s); 164 return SEQ_START_TOKEN;
164 165
165 return seq_list_start(&session->stat_list, *pos); 166 return seq_list_start(&session->stat_list, *pos);
166} 167}
@@ -169,6 +170,9 @@ static void *stat_seq_next(struct seq_file *s, void *p, loff_t *pos)
169{ 170{
170 struct tracer_stat_session *session = s->private; 171 struct tracer_stat_session *session = s->private;
171 172
173 if (p == SEQ_START_TOKEN)
174 return seq_list_start(&session->stat_list, *pos);
175
172 return seq_list_next(p, &session->stat_list, pos); 176 return seq_list_next(p, &session->stat_list, pos);
173} 177}
174 178
@@ -183,6 +187,9 @@ static int stat_seq_show(struct seq_file *s, void *v)
183 struct tracer_stat_session *session = s->private; 187 struct tracer_stat_session *session = s->private;
184 struct trace_stat_list *l = list_entry(v, struct trace_stat_list, list); 188 struct trace_stat_list *l = list_entry(v, struct trace_stat_list, list);
185 189
190 if (v == SEQ_START_TOKEN)
191 return session->ts->stat_headers(s);
192
186 return session->ts->stat_show(s, l->stat); 193 return session->ts->stat_show(s, l->stat);
187} 194}
188 195
diff --git a/kernel/trace/trace_workqueue.c b/kernel/trace/trace_workqueue.c
index 9ab035b58cf1..797201e4a137 100644
--- a/kernel/trace/trace_workqueue.c
+++ b/kernel/trace/trace_workqueue.c
@@ -196,6 +196,11 @@ static int workqueue_stat_show(struct seq_file *s, void *p)
196 struct pid *pid; 196 struct pid *pid;
197 struct task_struct *tsk; 197 struct task_struct *tsk;
198 198
199 spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
200 if (&cws->list == workqueue_cpu_stat(cpu)->list.next)
201 seq_printf(s, "\n");
202 spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
203
199 pid = find_get_pid(cws->pid); 204 pid = find_get_pid(cws->pid);
200 if (pid) { 205 if (pid) {
201 tsk = get_pid_task(pid, PIDTYPE_PID); 206 tsk = get_pid_task(pid, PIDTYPE_PID);
@@ -208,18 +213,13 @@ static int workqueue_stat_show(struct seq_file *s, void *p)
208 put_pid(pid); 213 put_pid(pid);
209 } 214 }
210 215
211 spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
212 if (&cws->list == workqueue_cpu_stat(cpu)->list.next)
213 seq_printf(s, "\n");
214 spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
215
216 return 0; 216 return 0;
217} 217}
218 218
219static int workqueue_stat_headers(struct seq_file *s) 219static int workqueue_stat_headers(struct seq_file *s)
220{ 220{
221 seq_printf(s, "# CPU INSERTED EXECUTED NAME\n"); 221 seq_printf(s, "# CPU INSERTED EXECUTED NAME\n");
222 seq_printf(s, "# | | | |\n\n"); 222 seq_printf(s, "# | | | |\n");
223 return 0; 223 return 0;
224} 224}
225 225
diff --git a/samples/tracepoints/tracepoint-sample.c b/samples/tracepoints/tracepoint-sample.c
index 68d5dc0310e4..9cf80a11e8b6 100644
--- a/samples/tracepoints/tracepoint-sample.c
+++ b/samples/tracepoints/tracepoint-sample.c
@@ -1,6 +1,6 @@
1/* tracepoint-sample.c 1/* tracepoint-sample.c
2 * 2 *
3 * Executes a tracepoint when /proc/tracepoint-example is opened. 3 * Executes a tracepoint when /proc/tracepoint-sample is opened.
4 * 4 *
5 * (C) Copyright 2007 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca> 5 * (C) Copyright 2007 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
6 * 6 *
@@ -16,7 +16,7 @@
16DEFINE_TRACE(subsys_event); 16DEFINE_TRACE(subsys_event);
17DEFINE_TRACE(subsys_eventb); 17DEFINE_TRACE(subsys_eventb);
18 18
19struct proc_dir_entry *pentry_example; 19struct proc_dir_entry *pentry_sample;
20 20
21static int my_open(struct inode *inode, struct file *file) 21static int my_open(struct inode *inode, struct file *file)
22{ 22{
@@ -32,25 +32,25 @@ static struct file_operations mark_ops = {
32 .open = my_open, 32 .open = my_open,
33}; 33};
34 34
35static int __init example_init(void) 35static int __init sample_init(void)
36{ 36{
37 printk(KERN_ALERT "example init\n"); 37 printk(KERN_ALERT "sample init\n");
38 pentry_example = proc_create("tracepoint-example", 0444, NULL, 38 pentry_sample = proc_create("tracepoint-sample", 0444, NULL,
39 &mark_ops); 39 &mark_ops);
40 if (!pentry_example) 40 if (!pentry_sample)
41 return -EPERM; 41 return -EPERM;
42 return 0; 42 return 0;
43} 43}
44 44
45static void __exit example_exit(void) 45static void __exit sample_exit(void)
46{ 46{
47 printk(KERN_ALERT "example exit\n"); 47 printk(KERN_ALERT "sample exit\n");
48 remove_proc_entry("tracepoint-example", NULL); 48 remove_proc_entry("tracepoint-sample", NULL);
49} 49}
50 50
51module_init(example_init) 51module_init(sample_init)
52module_exit(example_exit) 52module_exit(sample_exit)
53 53
54MODULE_LICENSE("GPL"); 54MODULE_LICENSE("GPL");
55MODULE_AUTHOR("Mathieu Desnoyers"); 55MODULE_AUTHOR("Mathieu Desnoyers");
56MODULE_DESCRIPTION("Tracepoint example"); 56MODULE_DESCRIPTION("Tracepoint sample");