diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/trace/Kconfig | 6 | ||||
-rw-r--r-- | kernel/trace/Makefile | 1 | ||||
-rw-r--r-- | kernel/trace/blktrace.c | 473 | ||||
-rw-r--r-- | kernel/trace/ftrace.c | 68 | ||||
-rw-r--r-- | kernel/trace/ring_buffer.c | 117 | ||||
-rw-r--r-- | kernel/trace/trace.c | 50 | ||||
-rw-r--r-- | kernel/trace/trace.h | 77 | ||||
-rw-r--r-- | kernel/trace/trace_clock.c | 1 | ||||
-rw-r--r-- | kernel/trace/trace_events.c | 203 | ||||
-rw-r--r-- | kernel/trace/trace_events_filter.c | 427 | ||||
-rw-r--r-- | kernel/trace/trace_events_stage_2.h | 45 | ||||
-rw-r--r-- | kernel/trace/trace_events_stage_3.h | 9 | ||||
-rw-r--r-- | kernel/trace/trace_functions_graph.c | 8 | ||||
-rw-r--r-- | kernel/trace/trace_nop.c | 1 | ||||
-rw-r--r-- | kernel/trace/trace_output.c | 19 | ||||
-rw-r--r-- | kernel/trace/trace_output.h | 33 | ||||
-rw-r--r-- | kernel/trace/trace_stat.c | 47 | ||||
-rw-r--r-- | kernel/trace/trace_workqueue.c | 12 |
18 files changed, 1256 insertions, 341 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index b0a46f889659..8a4d72931042 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig | |||
@@ -63,7 +63,11 @@ config TRACING | |||
63 | # | 63 | # |
64 | config TRACING_SUPPORT | 64 | config TRACING_SUPPORT |
65 | bool | 65 | bool |
66 | depends on TRACE_IRQFLAGS_SUPPORT | 66 | # PPC32 has no irqflags tracing support, but it can use most of the |
67 | # tracers anyway, they were tested to build and work. Note that new | ||
68 | # exceptions to this list aren't welcomed, better implement the | ||
69 | # irqflags tracing for your architecture. | ||
70 | depends on TRACE_IRQFLAGS_SUPPORT || PPC32 | ||
67 | depends on STACKTRACE_SUPPORT | 71 | depends on STACKTRACE_SUPPORT |
68 | default y | 72 | default y |
69 | 73 | ||
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 0e45c206c2f9..2630f5121ec1 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile | |||
@@ -45,5 +45,6 @@ obj-$(CONFIG_EVENT_TRACER) += events.o | |||
45 | obj-$(CONFIG_EVENT_TRACER) += trace_export.o | 45 | obj-$(CONFIG_EVENT_TRACER) += trace_export.o |
46 | obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o | 46 | obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o |
47 | obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o | 47 | obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o |
48 | obj-$(CONFIG_EVENT_TRACER) += trace_events_filter.o | ||
48 | 49 | ||
49 | libftrace-y := ftrace.o | 50 | libftrace-y := ftrace.o |
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index b171778e3863..947c5b3f90c4 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c | |||
@@ -30,7 +30,7 @@ | |||
30 | static unsigned int blktrace_seq __read_mostly = 1; | 30 | static unsigned int blktrace_seq __read_mostly = 1; |
31 | 31 | ||
32 | static struct trace_array *blk_tr; | 32 | static struct trace_array *blk_tr; |
33 | static int __read_mostly blk_tracer_enabled; | 33 | static bool blk_tracer_enabled __read_mostly; |
34 | 34 | ||
35 | /* Select an alternative, minimalistic output than the original one */ | 35 | /* Select an alternative, minimalistic output than the original one */ |
36 | #define TRACE_BLK_OPT_CLASSIC 0x1 | 36 | #define TRACE_BLK_OPT_CLASSIC 0x1 |
@@ -47,10 +47,9 @@ static struct tracer_flags blk_tracer_flags = { | |||
47 | }; | 47 | }; |
48 | 48 | ||
49 | /* Global reference count of probes */ | 49 | /* Global reference count of probes */ |
50 | static DEFINE_MUTEX(blk_probe_mutex); | ||
51 | static atomic_t blk_probes_ref = ATOMIC_INIT(0); | 50 | static atomic_t blk_probes_ref = ATOMIC_INIT(0); |
52 | 51 | ||
53 | static int blk_register_tracepoints(void); | 52 | static void blk_register_tracepoints(void); |
54 | static void blk_unregister_tracepoints(void); | 53 | static void blk_unregister_tracepoints(void); |
55 | 54 | ||
56 | /* | 55 | /* |
@@ -60,22 +59,39 @@ static void trace_note(struct blk_trace *bt, pid_t pid, int action, | |||
60 | const void *data, size_t len) | 59 | const void *data, size_t len) |
61 | { | 60 | { |
62 | struct blk_io_trace *t; | 61 | struct blk_io_trace *t; |
62 | struct ring_buffer_event *event = NULL; | ||
63 | int pc = 0; | ||
64 | int cpu = smp_processor_id(); | ||
65 | bool blk_tracer = blk_tracer_enabled; | ||
66 | |||
67 | if (blk_tracer) { | ||
68 | pc = preempt_count(); | ||
69 | event = trace_buffer_lock_reserve(blk_tr, TRACE_BLK, | ||
70 | sizeof(*t) + len, | ||
71 | 0, pc); | ||
72 | if (!event) | ||
73 | return; | ||
74 | t = ring_buffer_event_data(event); | ||
75 | goto record_it; | ||
76 | } | ||
63 | 77 | ||
64 | if (!bt->rchan) | 78 | if (!bt->rchan) |
65 | return; | 79 | return; |
66 | 80 | ||
67 | t = relay_reserve(bt->rchan, sizeof(*t) + len); | 81 | t = relay_reserve(bt->rchan, sizeof(*t) + len); |
68 | if (t) { | 82 | if (t) { |
69 | const int cpu = smp_processor_id(); | ||
70 | |||
71 | t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION; | 83 | t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION; |
72 | t->time = ktime_to_ns(ktime_get()); | 84 | t->time = ktime_to_ns(ktime_get()); |
85 | record_it: | ||
73 | t->device = bt->dev; | 86 | t->device = bt->dev; |
74 | t->action = action; | 87 | t->action = action; |
75 | t->pid = pid; | 88 | t->pid = pid; |
76 | t->cpu = cpu; | 89 | t->cpu = cpu; |
77 | t->pdu_len = len; | 90 | t->pdu_len = len; |
78 | memcpy((void *) t + sizeof(*t), data, len); | 91 | memcpy((void *) t + sizeof(*t), data, len); |
92 | |||
93 | if (blk_tracer) | ||
94 | trace_buffer_unlock_commit(blk_tr, event, 0, pc); | ||
79 | } | 95 | } |
80 | } | 96 | } |
81 | 97 | ||
@@ -111,14 +127,8 @@ void __trace_note_message(struct blk_trace *bt, const char *fmt, ...) | |||
111 | unsigned long flags; | 127 | unsigned long flags; |
112 | char *buf; | 128 | char *buf; |
113 | 129 | ||
114 | if (blk_tr) { | 130 | if (unlikely(bt->trace_state != Blktrace_running && |
115 | va_start(args, fmt); | 131 | !blk_tracer_enabled)) |
116 | ftrace_vprintk(fmt, args); | ||
117 | va_end(args); | ||
118 | return; | ||
119 | } | ||
120 | |||
121 | if (!bt->msg_data) | ||
122 | return; | 132 | return; |
123 | 133 | ||
124 | local_irq_save(flags); | 134 | local_irq_save(flags); |
@@ -148,8 +158,8 @@ static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector, | |||
148 | /* | 158 | /* |
149 | * Data direction bit lookup | 159 | * Data direction bit lookup |
150 | */ | 160 | */ |
151 | static u32 ddir_act[2] __read_mostly = { BLK_TC_ACT(BLK_TC_READ), | 161 | static const u32 ddir_act[2] = { BLK_TC_ACT(BLK_TC_READ), |
152 | BLK_TC_ACT(BLK_TC_WRITE) }; | 162 | BLK_TC_ACT(BLK_TC_WRITE) }; |
153 | 163 | ||
154 | /* The ilog2() calls fall out because they're constant */ | 164 | /* The ilog2() calls fall out because they're constant */ |
155 | #define MASK_TC_BIT(rw, __name) ((rw & (1 << BIO_RW_ ## __name)) << \ | 165 | #define MASK_TC_BIT(rw, __name) ((rw & (1 << BIO_RW_ ## __name)) << \ |
@@ -169,9 +179,9 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, | |||
169 | unsigned long *sequence; | 179 | unsigned long *sequence; |
170 | pid_t pid; | 180 | pid_t pid; |
171 | int cpu, pc = 0; | 181 | int cpu, pc = 0; |
182 | bool blk_tracer = blk_tracer_enabled; | ||
172 | 183 | ||
173 | if (unlikely(bt->trace_state != Blktrace_running || | 184 | if (unlikely(bt->trace_state != Blktrace_running && !blk_tracer)) |
174 | !blk_tracer_enabled)) | ||
175 | return; | 185 | return; |
176 | 186 | ||
177 | what |= ddir_act[rw & WRITE]; | 187 | what |= ddir_act[rw & WRITE]; |
@@ -186,7 +196,7 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, | |||
186 | return; | 196 | return; |
187 | cpu = raw_smp_processor_id(); | 197 | cpu = raw_smp_processor_id(); |
188 | 198 | ||
189 | if (blk_tr) { | 199 | if (blk_tracer) { |
190 | tracing_record_cmdline(current); | 200 | tracing_record_cmdline(current); |
191 | 201 | ||
192 | pc = preempt_count(); | 202 | pc = preempt_count(); |
@@ -236,7 +246,7 @@ record_it: | |||
236 | if (pdu_len) | 246 | if (pdu_len) |
237 | memcpy((void *) t + sizeof(*t), pdu_data, pdu_len); | 247 | memcpy((void *) t + sizeof(*t), pdu_data, pdu_len); |
238 | 248 | ||
239 | if (blk_tr) { | 249 | if (blk_tracer) { |
240 | trace_buffer_unlock_commit(blk_tr, event, 0, pc); | 250 | trace_buffer_unlock_commit(blk_tr, event, 0, pc); |
241 | return; | 251 | return; |
242 | } | 252 | } |
@@ -248,7 +258,7 @@ record_it: | |||
248 | static struct dentry *blk_tree_root; | 258 | static struct dentry *blk_tree_root; |
249 | static DEFINE_MUTEX(blk_tree_mutex); | 259 | static DEFINE_MUTEX(blk_tree_mutex); |
250 | 260 | ||
251 | static void blk_trace_cleanup(struct blk_trace *bt) | 261 | static void blk_trace_free(struct blk_trace *bt) |
252 | { | 262 | { |
253 | debugfs_remove(bt->msg_file); | 263 | debugfs_remove(bt->msg_file); |
254 | debugfs_remove(bt->dropped_file); | 264 | debugfs_remove(bt->dropped_file); |
@@ -256,10 +266,13 @@ static void blk_trace_cleanup(struct blk_trace *bt) | |||
256 | free_percpu(bt->sequence); | 266 | free_percpu(bt->sequence); |
257 | free_percpu(bt->msg_data); | 267 | free_percpu(bt->msg_data); |
258 | kfree(bt); | 268 | kfree(bt); |
259 | mutex_lock(&blk_probe_mutex); | 269 | } |
270 | |||
271 | static void blk_trace_cleanup(struct blk_trace *bt) | ||
272 | { | ||
273 | blk_trace_free(bt); | ||
260 | if (atomic_dec_and_test(&blk_probes_ref)) | 274 | if (atomic_dec_and_test(&blk_probes_ref)) |
261 | blk_unregister_tracepoints(); | 275 | blk_unregister_tracepoints(); |
262 | mutex_unlock(&blk_probe_mutex); | ||
263 | } | 276 | } |
264 | 277 | ||
265 | int blk_trace_remove(struct request_queue *q) | 278 | int blk_trace_remove(struct request_queue *q) |
@@ -270,8 +283,7 @@ int blk_trace_remove(struct request_queue *q) | |||
270 | if (!bt) | 283 | if (!bt) |
271 | return -EINVAL; | 284 | return -EINVAL; |
272 | 285 | ||
273 | if (bt->trace_state == Blktrace_setup || | 286 | if (bt->trace_state != Blktrace_running) |
274 | bt->trace_state == Blktrace_stopped) | ||
275 | blk_trace_cleanup(bt); | 287 | blk_trace_cleanup(bt); |
276 | 288 | ||
277 | return 0; | 289 | return 0; |
@@ -414,11 +426,11 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, | |||
414 | if (buts->name[i] == '/') | 426 | if (buts->name[i] == '/') |
415 | buts->name[i] = '_'; | 427 | buts->name[i] = '_'; |
416 | 428 | ||
417 | ret = -ENOMEM; | ||
418 | bt = kzalloc(sizeof(*bt), GFP_KERNEL); | 429 | bt = kzalloc(sizeof(*bt), GFP_KERNEL); |
419 | if (!bt) | 430 | if (!bt) |
420 | goto err; | 431 | return -ENOMEM; |
421 | 432 | ||
433 | ret = -ENOMEM; | ||
422 | bt->sequence = alloc_percpu(unsigned long); | 434 | bt->sequence = alloc_percpu(unsigned long); |
423 | if (!bt->sequence) | 435 | if (!bt->sequence) |
424 | goto err; | 436 | goto err; |
@@ -429,11 +441,15 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, | |||
429 | 441 | ||
430 | ret = -ENOENT; | 442 | ret = -ENOENT; |
431 | 443 | ||
444 | mutex_lock(&blk_tree_mutex); | ||
432 | if (!blk_tree_root) { | 445 | if (!blk_tree_root) { |
433 | blk_tree_root = debugfs_create_dir("block", NULL); | 446 | blk_tree_root = debugfs_create_dir("block", NULL); |
434 | if (!blk_tree_root) | 447 | if (!blk_tree_root) { |
435 | return -ENOMEM; | 448 | mutex_unlock(&blk_tree_mutex); |
449 | goto err; | ||
450 | } | ||
436 | } | 451 | } |
452 | mutex_unlock(&blk_tree_mutex); | ||
437 | 453 | ||
438 | dir = debugfs_create_dir(buts->name, blk_tree_root); | 454 | dir = debugfs_create_dir(buts->name, blk_tree_root); |
439 | 455 | ||
@@ -471,14 +487,6 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, | |||
471 | bt->pid = buts->pid; | 487 | bt->pid = buts->pid; |
472 | bt->trace_state = Blktrace_setup; | 488 | bt->trace_state = Blktrace_setup; |
473 | 489 | ||
474 | mutex_lock(&blk_probe_mutex); | ||
475 | if (atomic_add_return(1, &blk_probes_ref) == 1) { | ||
476 | ret = blk_register_tracepoints(); | ||
477 | if (ret) | ||
478 | goto probe_err; | ||
479 | } | ||
480 | mutex_unlock(&blk_probe_mutex); | ||
481 | |||
482 | ret = -EBUSY; | 490 | ret = -EBUSY; |
483 | old_bt = xchg(&q->blk_trace, bt); | 491 | old_bt = xchg(&q->blk_trace, bt); |
484 | if (old_bt) { | 492 | if (old_bt) { |
@@ -486,22 +494,12 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, | |||
486 | goto err; | 494 | goto err; |
487 | } | 495 | } |
488 | 496 | ||
497 | if (atomic_inc_return(&blk_probes_ref) == 1) | ||
498 | blk_register_tracepoints(); | ||
499 | |||
489 | return 0; | 500 | return 0; |
490 | probe_err: | ||
491 | atomic_dec(&blk_probes_ref); | ||
492 | mutex_unlock(&blk_probe_mutex); | ||
493 | err: | 501 | err: |
494 | if (bt) { | 502 | blk_trace_free(bt); |
495 | if (bt->msg_file) | ||
496 | debugfs_remove(bt->msg_file); | ||
497 | if (bt->dropped_file) | ||
498 | debugfs_remove(bt->dropped_file); | ||
499 | free_percpu(bt->sequence); | ||
500 | free_percpu(bt->msg_data); | ||
501 | if (bt->rchan) | ||
502 | relay_close(bt->rchan); | ||
503 | kfree(bt); | ||
504 | } | ||
505 | return ret; | 503 | return ret; |
506 | } | 504 | } |
507 | 505 | ||
@@ -863,7 +861,7 @@ void blk_add_driver_data(struct request_queue *q, | |||
863 | } | 861 | } |
864 | EXPORT_SYMBOL_GPL(blk_add_driver_data); | 862 | EXPORT_SYMBOL_GPL(blk_add_driver_data); |
865 | 863 | ||
866 | static int blk_register_tracepoints(void) | 864 | static void blk_register_tracepoints(void) |
867 | { | 865 | { |
868 | int ret; | 866 | int ret; |
869 | 867 | ||
@@ -901,7 +899,6 @@ static int blk_register_tracepoints(void) | |||
901 | WARN_ON(ret); | 899 | WARN_ON(ret); |
902 | ret = register_trace_block_remap(blk_add_trace_remap); | 900 | ret = register_trace_block_remap(blk_add_trace_remap); |
903 | WARN_ON(ret); | 901 | WARN_ON(ret); |
904 | return 0; | ||
905 | } | 902 | } |
906 | 903 | ||
907 | static void blk_unregister_tracepoints(void) | 904 | static void blk_unregister_tracepoints(void) |
@@ -934,25 +931,31 @@ static void blk_unregister_tracepoints(void) | |||
934 | static void fill_rwbs(char *rwbs, const struct blk_io_trace *t) | 931 | static void fill_rwbs(char *rwbs, const struct blk_io_trace *t) |
935 | { | 932 | { |
936 | int i = 0; | 933 | int i = 0; |
934 | int tc = t->action >> BLK_TC_SHIFT; | ||
935 | |||
936 | if (t->action == BLK_TN_MESSAGE) { | ||
937 | rwbs[i++] = 'N'; | ||
938 | goto out; | ||
939 | } | ||
937 | 940 | ||
938 | if (t->action & BLK_TC_DISCARD) | 941 | if (tc & BLK_TC_DISCARD) |
939 | rwbs[i++] = 'D'; | 942 | rwbs[i++] = 'D'; |
940 | else if (t->action & BLK_TC_WRITE) | 943 | else if (tc & BLK_TC_WRITE) |
941 | rwbs[i++] = 'W'; | 944 | rwbs[i++] = 'W'; |
942 | else if (t->bytes) | 945 | else if (t->bytes) |
943 | rwbs[i++] = 'R'; | 946 | rwbs[i++] = 'R'; |
944 | else | 947 | else |
945 | rwbs[i++] = 'N'; | 948 | rwbs[i++] = 'N'; |
946 | 949 | ||
947 | if (t->action & BLK_TC_AHEAD) | 950 | if (tc & BLK_TC_AHEAD) |
948 | rwbs[i++] = 'A'; | 951 | rwbs[i++] = 'A'; |
949 | if (t->action & BLK_TC_BARRIER) | 952 | if (tc & BLK_TC_BARRIER) |
950 | rwbs[i++] = 'B'; | 953 | rwbs[i++] = 'B'; |
951 | if (t->action & BLK_TC_SYNC) | 954 | if (tc & BLK_TC_SYNC) |
952 | rwbs[i++] = 'S'; | 955 | rwbs[i++] = 'S'; |
953 | if (t->action & BLK_TC_META) | 956 | if (tc & BLK_TC_META) |
954 | rwbs[i++] = 'M'; | 957 | rwbs[i++] = 'M'; |
955 | 958 | out: | |
956 | rwbs[i] = '\0'; | 959 | rwbs[i] = '\0'; |
957 | } | 960 | } |
958 | 961 | ||
@@ -979,7 +982,7 @@ static inline unsigned long long t_sector(const struct trace_entry *ent) | |||
979 | 982 | ||
980 | static inline __u16 t_error(const struct trace_entry *ent) | 983 | static inline __u16 t_error(const struct trace_entry *ent) |
981 | { | 984 | { |
982 | return te_blk_io_trace(ent)->sector; | 985 | return te_blk_io_trace(ent)->error; |
983 | } | 986 | } |
984 | 987 | ||
985 | static __u64 get_pdu_int(const struct trace_entry *ent) | 988 | static __u64 get_pdu_int(const struct trace_entry *ent) |
@@ -999,29 +1002,31 @@ static void get_pdu_remap(const struct trace_entry *ent, | |||
999 | r->sector = be64_to_cpu(sector); | 1002 | r->sector = be64_to_cpu(sector); |
1000 | } | 1003 | } |
1001 | 1004 | ||
1002 | static int blk_log_action_iter(struct trace_iterator *iter, const char *act) | 1005 | typedef int (blk_log_action_t) (struct trace_iterator *iter, const char *act); |
1006 | |||
1007 | static int blk_log_action_classic(struct trace_iterator *iter, const char *act) | ||
1003 | { | 1008 | { |
1004 | char rwbs[6]; | 1009 | char rwbs[6]; |
1005 | unsigned long long ts = ns2usecs(iter->ts); | 1010 | unsigned long long ts = iter->ts; |
1006 | unsigned long usec_rem = do_div(ts, USEC_PER_SEC); | 1011 | unsigned long nsec_rem = do_div(ts, NSEC_PER_SEC); |
1007 | unsigned secs = (unsigned long)ts; | 1012 | unsigned secs = (unsigned long)ts; |
1008 | const struct trace_entry *ent = iter->ent; | 1013 | const struct blk_io_trace *t = te_blk_io_trace(iter->ent); |
1009 | const struct blk_io_trace *t = (const struct blk_io_trace *)ent; | ||
1010 | 1014 | ||
1011 | fill_rwbs(rwbs, t); | 1015 | fill_rwbs(rwbs, t); |
1012 | 1016 | ||
1013 | return trace_seq_printf(&iter->seq, | 1017 | return trace_seq_printf(&iter->seq, |
1014 | "%3d,%-3d %2d %5d.%06lu %5u %2s %3s ", | 1018 | "%3d,%-3d %2d %5d.%09lu %5u %2s %3s ", |
1015 | MAJOR(t->device), MINOR(t->device), iter->cpu, | 1019 | MAJOR(t->device), MINOR(t->device), iter->cpu, |
1016 | secs, usec_rem, ent->pid, act, rwbs); | 1020 | secs, nsec_rem, iter->ent->pid, act, rwbs); |
1017 | } | 1021 | } |
1018 | 1022 | ||
1019 | static int blk_log_action_seq(struct trace_seq *s, const struct blk_io_trace *t, | 1023 | static int blk_log_action(struct trace_iterator *iter, const char *act) |
1020 | const char *act) | ||
1021 | { | 1024 | { |
1022 | char rwbs[6]; | 1025 | char rwbs[6]; |
1026 | const struct blk_io_trace *t = te_blk_io_trace(iter->ent); | ||
1027 | |||
1023 | fill_rwbs(rwbs, t); | 1028 | fill_rwbs(rwbs, t); |
1024 | return trace_seq_printf(s, "%3d,%-3d %2s %3s ", | 1029 | return trace_seq_printf(&iter->seq, "%3d,%-3d %2s %3s ", |
1025 | MAJOR(t->device), MINOR(t->device), act, rwbs); | 1030 | MAJOR(t->device), MINOR(t->device), act, rwbs); |
1026 | } | 1031 | } |
1027 | 1032 | ||
@@ -1085,6 +1090,17 @@ static int blk_log_split(struct trace_seq *s, const struct trace_entry *ent) | |||
1085 | get_pdu_int(ent), cmd); | 1090 | get_pdu_int(ent), cmd); |
1086 | } | 1091 | } |
1087 | 1092 | ||
1093 | static int blk_log_msg(struct trace_seq *s, const struct trace_entry *ent) | ||
1094 | { | ||
1095 | int ret; | ||
1096 | const struct blk_io_trace *t = te_blk_io_trace(ent); | ||
1097 | |||
1098 | ret = trace_seq_putmem(s, t + 1, t->pdu_len); | ||
1099 | if (ret) | ||
1100 | return trace_seq_putc(s, '\n'); | ||
1101 | return ret; | ||
1102 | } | ||
1103 | |||
1088 | /* | 1104 | /* |
1089 | * struct tracer operations | 1105 | * struct tracer operations |
1090 | */ | 1106 | */ |
@@ -1099,11 +1115,7 @@ static void blk_tracer_print_header(struct seq_file *m) | |||
1099 | 1115 | ||
1100 | static void blk_tracer_start(struct trace_array *tr) | 1116 | static void blk_tracer_start(struct trace_array *tr) |
1101 | { | 1117 | { |
1102 | mutex_lock(&blk_probe_mutex); | 1118 | blk_tracer_enabled = true; |
1103 | if (atomic_add_return(1, &blk_probes_ref) == 1) | ||
1104 | if (blk_register_tracepoints()) | ||
1105 | atomic_dec(&blk_probes_ref); | ||
1106 | mutex_unlock(&blk_probe_mutex); | ||
1107 | trace_flags &= ~TRACE_ITER_CONTEXT_INFO; | 1119 | trace_flags &= ~TRACE_ITER_CONTEXT_INFO; |
1108 | } | 1120 | } |
1109 | 1121 | ||
@@ -1111,38 +1123,24 @@ static int blk_tracer_init(struct trace_array *tr) | |||
1111 | { | 1123 | { |
1112 | blk_tr = tr; | 1124 | blk_tr = tr; |
1113 | blk_tracer_start(tr); | 1125 | blk_tracer_start(tr); |
1114 | mutex_lock(&blk_probe_mutex); | ||
1115 | blk_tracer_enabled++; | ||
1116 | mutex_unlock(&blk_probe_mutex); | ||
1117 | return 0; | 1126 | return 0; |
1118 | } | 1127 | } |
1119 | 1128 | ||
1120 | static void blk_tracer_stop(struct trace_array *tr) | 1129 | static void blk_tracer_stop(struct trace_array *tr) |
1121 | { | 1130 | { |
1131 | blk_tracer_enabled = false; | ||
1122 | trace_flags |= TRACE_ITER_CONTEXT_INFO; | 1132 | trace_flags |= TRACE_ITER_CONTEXT_INFO; |
1123 | mutex_lock(&blk_probe_mutex); | ||
1124 | if (atomic_dec_and_test(&blk_probes_ref)) | ||
1125 | blk_unregister_tracepoints(); | ||
1126 | mutex_unlock(&blk_probe_mutex); | ||
1127 | } | 1133 | } |
1128 | 1134 | ||
1129 | static void blk_tracer_reset(struct trace_array *tr) | 1135 | static void blk_tracer_reset(struct trace_array *tr) |
1130 | { | 1136 | { |
1131 | if (!atomic_read(&blk_probes_ref)) | ||
1132 | return; | ||
1133 | |||
1134 | mutex_lock(&blk_probe_mutex); | ||
1135 | blk_tracer_enabled--; | ||
1136 | WARN_ON(blk_tracer_enabled < 0); | ||
1137 | mutex_unlock(&blk_probe_mutex); | ||
1138 | |||
1139 | blk_tracer_stop(tr); | 1137 | blk_tracer_stop(tr); |
1140 | } | 1138 | } |
1141 | 1139 | ||
1142 | static struct { | 1140 | static const struct { |
1143 | const char *act[2]; | 1141 | const char *act[2]; |
1144 | int (*print)(struct trace_seq *s, const struct trace_entry *ent); | 1142 | int (*print)(struct trace_seq *s, const struct trace_entry *ent); |
1145 | } what2act[] __read_mostly = { | 1143 | } what2act[] = { |
1146 | [__BLK_TA_QUEUE] = {{ "Q", "queue" }, blk_log_generic }, | 1144 | [__BLK_TA_QUEUE] = {{ "Q", "queue" }, blk_log_generic }, |
1147 | [__BLK_TA_BACKMERGE] = {{ "M", "backmerge" }, blk_log_generic }, | 1145 | [__BLK_TA_BACKMERGE] = {{ "M", "backmerge" }, blk_log_generic }, |
1148 | [__BLK_TA_FRONTMERGE] = {{ "F", "frontmerge" }, blk_log_generic }, | 1146 | [__BLK_TA_FRONTMERGE] = {{ "F", "frontmerge" }, blk_log_generic }, |
@@ -1160,29 +1158,48 @@ static struct { | |||
1160 | [__BLK_TA_REMAP] = {{ "A", "remap" }, blk_log_remap }, | 1158 | [__BLK_TA_REMAP] = {{ "A", "remap" }, blk_log_remap }, |
1161 | }; | 1159 | }; |
1162 | 1160 | ||
1163 | static enum print_line_t blk_trace_event_print(struct trace_iterator *iter, | 1161 | static enum print_line_t print_one_line(struct trace_iterator *iter, |
1164 | int flags) | 1162 | bool classic) |
1165 | { | 1163 | { |
1166 | struct trace_seq *s = &iter->seq; | 1164 | struct trace_seq *s = &iter->seq; |
1167 | const struct blk_io_trace *t = (struct blk_io_trace *)iter->ent; | 1165 | const struct blk_io_trace *t; |
1168 | const u16 what = t->action & ((1 << BLK_TC_SHIFT) - 1); | 1166 | u16 what; |
1169 | int ret; | 1167 | int ret; |
1168 | bool long_act; | ||
1169 | blk_log_action_t *log_action; | ||
1170 | 1170 | ||
1171 | if (!trace_print_context(iter)) | 1171 | t = te_blk_io_trace(iter->ent); |
1172 | return TRACE_TYPE_PARTIAL_LINE; | 1172 | what = t->action & ((1 << BLK_TC_SHIFT) - 1); |
1173 | long_act = !!(trace_flags & TRACE_ITER_VERBOSE); | ||
1174 | log_action = classic ? &blk_log_action_classic : &blk_log_action; | ||
1173 | 1175 | ||
1174 | if (unlikely(what == 0 || what > ARRAY_SIZE(what2act))) | 1176 | if (t->action == BLK_TN_MESSAGE) { |
1177 | ret = log_action(iter, long_act ? "message" : "m"); | ||
1178 | if (ret) | ||
1179 | ret = blk_log_msg(s, iter->ent); | ||
1180 | goto out; | ||
1181 | } | ||
1182 | |||
1183 | if (unlikely(what == 0 || what >= ARRAY_SIZE(what2act))) | ||
1175 | ret = trace_seq_printf(s, "Bad pc action %x\n", what); | 1184 | ret = trace_seq_printf(s, "Bad pc action %x\n", what); |
1176 | else { | 1185 | else { |
1177 | const bool long_act = !!(trace_flags & TRACE_ITER_VERBOSE); | 1186 | ret = log_action(iter, what2act[what].act[long_act]); |
1178 | ret = blk_log_action_seq(s, t, what2act[what].act[long_act]); | ||
1179 | if (ret) | 1187 | if (ret) |
1180 | ret = what2act[what].print(s, iter->ent); | 1188 | ret = what2act[what].print(s, iter->ent); |
1181 | } | 1189 | } |
1182 | 1190 | out: | |
1183 | return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE; | 1191 | return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE; |
1184 | } | 1192 | } |
1185 | 1193 | ||
1194 | static enum print_line_t blk_trace_event_print(struct trace_iterator *iter, | ||
1195 | int flags) | ||
1196 | { | ||
1197 | if (!trace_print_context(iter)) | ||
1198 | return TRACE_TYPE_PARTIAL_LINE; | ||
1199 | |||
1200 | return print_one_line(iter, false); | ||
1201 | } | ||
1202 | |||
1186 | static int blk_trace_synthesize_old_trace(struct trace_iterator *iter) | 1203 | static int blk_trace_synthesize_old_trace(struct trace_iterator *iter) |
1187 | { | 1204 | { |
1188 | struct trace_seq *s = &iter->seq; | 1205 | struct trace_seq *s = &iter->seq; |
@@ -1190,7 +1207,7 @@ static int blk_trace_synthesize_old_trace(struct trace_iterator *iter) | |||
1190 | const int offset = offsetof(struct blk_io_trace, sector); | 1207 | const int offset = offsetof(struct blk_io_trace, sector); |
1191 | struct blk_io_trace old = { | 1208 | struct blk_io_trace old = { |
1192 | .magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION, | 1209 | .magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION, |
1193 | .time = ns2usecs(iter->ts), | 1210 | .time = iter->ts, |
1194 | }; | 1211 | }; |
1195 | 1212 | ||
1196 | if (!trace_seq_putmem(s, &old, offset)) | 1213 | if (!trace_seq_putmem(s, &old, offset)) |
@@ -1208,26 +1225,10 @@ blk_trace_event_print_binary(struct trace_iterator *iter, int flags) | |||
1208 | 1225 | ||
1209 | static enum print_line_t blk_tracer_print_line(struct trace_iterator *iter) | 1226 | static enum print_line_t blk_tracer_print_line(struct trace_iterator *iter) |
1210 | { | 1227 | { |
1211 | const struct blk_io_trace *t; | ||
1212 | u16 what; | ||
1213 | int ret; | ||
1214 | |||
1215 | if (!(blk_tracer_flags.val & TRACE_BLK_OPT_CLASSIC)) | 1228 | if (!(blk_tracer_flags.val & TRACE_BLK_OPT_CLASSIC)) |
1216 | return TRACE_TYPE_UNHANDLED; | 1229 | return TRACE_TYPE_UNHANDLED; |
1217 | 1230 | ||
1218 | t = (const struct blk_io_trace *)iter->ent; | 1231 | return print_one_line(iter, true); |
1219 | what = t->action & ((1 << BLK_TC_SHIFT) - 1); | ||
1220 | |||
1221 | if (unlikely(what == 0 || what > ARRAY_SIZE(what2act))) | ||
1222 | ret = trace_seq_printf(&iter->seq, "Bad pc action %x\n", what); | ||
1223 | else { | ||
1224 | const bool long_act = !!(trace_flags & TRACE_ITER_VERBOSE); | ||
1225 | ret = blk_log_action_iter(iter, what2act[what].act[long_act]); | ||
1226 | if (ret) | ||
1227 | ret = what2act[what].print(&iter->seq, iter->ent); | ||
1228 | } | ||
1229 | |||
1230 | return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE; | ||
1231 | } | 1232 | } |
1232 | 1233 | ||
1233 | static struct tracer blk_tracer __read_mostly = { | 1234 | static struct tracer blk_tracer __read_mostly = { |
@@ -1273,7 +1274,10 @@ static int blk_trace_remove_queue(struct request_queue *q) | |||
1273 | if (bt == NULL) | 1274 | if (bt == NULL) |
1274 | return -EINVAL; | 1275 | return -EINVAL; |
1275 | 1276 | ||
1276 | kfree(bt); | 1277 | if (atomic_dec_and_test(&blk_probes_ref)) |
1278 | blk_unregister_tracepoints(); | ||
1279 | |||
1280 | blk_trace_free(bt); | ||
1277 | return 0; | 1281 | return 0; |
1278 | } | 1282 | } |
1279 | 1283 | ||
@@ -1283,26 +1287,33 @@ static int blk_trace_remove_queue(struct request_queue *q) | |||
1283 | static int blk_trace_setup_queue(struct request_queue *q, dev_t dev) | 1287 | static int blk_trace_setup_queue(struct request_queue *q, dev_t dev) |
1284 | { | 1288 | { |
1285 | struct blk_trace *old_bt, *bt = NULL; | 1289 | struct blk_trace *old_bt, *bt = NULL; |
1286 | int ret; | 1290 | int ret = -ENOMEM; |
1287 | 1291 | ||
1288 | ret = -ENOMEM; | ||
1289 | bt = kzalloc(sizeof(*bt), GFP_KERNEL); | 1292 | bt = kzalloc(sizeof(*bt), GFP_KERNEL); |
1290 | if (!bt) | 1293 | if (!bt) |
1291 | goto err; | 1294 | return -ENOMEM; |
1295 | |||
1296 | bt->msg_data = __alloc_percpu(BLK_TN_MAX_MSG, __alignof__(char)); | ||
1297 | if (!bt->msg_data) | ||
1298 | goto free_bt; | ||
1292 | 1299 | ||
1293 | bt->dev = dev; | 1300 | bt->dev = dev; |
1294 | bt->act_mask = (u16)-1; | 1301 | bt->act_mask = (u16)-1; |
1295 | bt->end_lba = -1ULL; | 1302 | bt->end_lba = -1ULL; |
1296 | bt->trace_state = Blktrace_running; | ||
1297 | 1303 | ||
1298 | old_bt = xchg(&q->blk_trace, bt); | 1304 | old_bt = xchg(&q->blk_trace, bt); |
1299 | if (old_bt != NULL) { | 1305 | if (old_bt != NULL) { |
1300 | (void)xchg(&q->blk_trace, old_bt); | 1306 | (void)xchg(&q->blk_trace, old_bt); |
1301 | kfree(bt); | ||
1302 | ret = -EBUSY; | 1307 | ret = -EBUSY; |
1308 | goto free_bt; | ||
1303 | } | 1309 | } |
1310 | |||
1311 | if (atomic_inc_return(&blk_probes_ref) == 1) | ||
1312 | blk_register_tracepoints(); | ||
1304 | return 0; | 1313 | return 0; |
1305 | err: | 1314 | |
1315 | free_bt: | ||
1316 | blk_trace_free(bt); | ||
1306 | return ret; | 1317 | return ret; |
1307 | } | 1318 | } |
1308 | 1319 | ||
@@ -1310,72 +1321,6 @@ err: | |||
1310 | * sysfs interface to enable and configure tracing | 1321 | * sysfs interface to enable and configure tracing |
1311 | */ | 1322 | */ |
1312 | 1323 | ||
1313 | static ssize_t sysfs_blk_trace_enable_show(struct device *dev, | ||
1314 | struct device_attribute *attr, | ||
1315 | char *buf) | ||
1316 | { | ||
1317 | struct hd_struct *p = dev_to_part(dev); | ||
1318 | struct block_device *bdev; | ||
1319 | ssize_t ret = -ENXIO; | ||
1320 | |||
1321 | lock_kernel(); | ||
1322 | bdev = bdget(part_devt(p)); | ||
1323 | if (bdev != NULL) { | ||
1324 | struct request_queue *q = bdev_get_queue(bdev); | ||
1325 | |||
1326 | if (q != NULL) { | ||
1327 | mutex_lock(&bdev->bd_mutex); | ||
1328 | ret = sprintf(buf, "%u\n", !!q->blk_trace); | ||
1329 | mutex_unlock(&bdev->bd_mutex); | ||
1330 | } | ||
1331 | |||
1332 | bdput(bdev); | ||
1333 | } | ||
1334 | |||
1335 | unlock_kernel(); | ||
1336 | return ret; | ||
1337 | } | ||
1338 | |||
1339 | static ssize_t sysfs_blk_trace_enable_store(struct device *dev, | ||
1340 | struct device_attribute *attr, | ||
1341 | const char *buf, size_t count) | ||
1342 | { | ||
1343 | struct block_device *bdev; | ||
1344 | struct request_queue *q; | ||
1345 | struct hd_struct *p; | ||
1346 | int value; | ||
1347 | ssize_t ret = -ENXIO; | ||
1348 | |||
1349 | if (count == 0 || sscanf(buf, "%d", &value) != 1) | ||
1350 | goto out; | ||
1351 | |||
1352 | lock_kernel(); | ||
1353 | p = dev_to_part(dev); | ||
1354 | bdev = bdget(part_devt(p)); | ||
1355 | if (bdev == NULL) | ||
1356 | goto out_unlock_kernel; | ||
1357 | |||
1358 | q = bdev_get_queue(bdev); | ||
1359 | if (q == NULL) | ||
1360 | goto out_bdput; | ||
1361 | |||
1362 | mutex_lock(&bdev->bd_mutex); | ||
1363 | if (value) | ||
1364 | ret = blk_trace_setup_queue(q, bdev->bd_dev); | ||
1365 | else | ||
1366 | ret = blk_trace_remove_queue(q); | ||
1367 | mutex_unlock(&bdev->bd_mutex); | ||
1368 | |||
1369 | if (ret == 0) | ||
1370 | ret = count; | ||
1371 | out_bdput: | ||
1372 | bdput(bdev); | ||
1373 | out_unlock_kernel: | ||
1374 | unlock_kernel(); | ||
1375 | out: | ||
1376 | return ret; | ||
1377 | } | ||
1378 | |||
1379 | static ssize_t sysfs_blk_trace_attr_show(struct device *dev, | 1324 | static ssize_t sysfs_blk_trace_attr_show(struct device *dev, |
1380 | struct device_attribute *attr, | 1325 | struct device_attribute *attr, |
1381 | char *buf); | 1326 | char *buf); |
@@ -1387,8 +1332,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev, | |||
1387 | sysfs_blk_trace_attr_show, \ | 1332 | sysfs_blk_trace_attr_show, \ |
1388 | sysfs_blk_trace_attr_store) | 1333 | sysfs_blk_trace_attr_store) |
1389 | 1334 | ||
1390 | static DEVICE_ATTR(enable, S_IRUGO | S_IWUSR, | 1335 | static BLK_TRACE_DEVICE_ATTR(enable); |
1391 | sysfs_blk_trace_enable_show, sysfs_blk_trace_enable_store); | ||
1392 | static BLK_TRACE_DEVICE_ATTR(act_mask); | 1336 | static BLK_TRACE_DEVICE_ATTR(act_mask); |
1393 | static BLK_TRACE_DEVICE_ATTR(pid); | 1337 | static BLK_TRACE_DEVICE_ATTR(pid); |
1394 | static BLK_TRACE_DEVICE_ATTR(start_lba); | 1338 | static BLK_TRACE_DEVICE_ATTR(start_lba); |
@@ -1408,53 +1352,85 @@ struct attribute_group blk_trace_attr_group = { | |||
1408 | .attrs = blk_trace_attrs, | 1352 | .attrs = blk_trace_attrs, |
1409 | }; | 1353 | }; |
1410 | 1354 | ||
1411 | static int blk_str2act_mask(const char *str) | 1355 | static const struct { |
1356 | int mask; | ||
1357 | const char *str; | ||
1358 | } mask_maps[] = { | ||
1359 | { BLK_TC_READ, "read" }, | ||
1360 | { BLK_TC_WRITE, "write" }, | ||
1361 | { BLK_TC_BARRIER, "barrier" }, | ||
1362 | { BLK_TC_SYNC, "sync" }, | ||
1363 | { BLK_TC_QUEUE, "queue" }, | ||
1364 | { BLK_TC_REQUEUE, "requeue" }, | ||
1365 | { BLK_TC_ISSUE, "issue" }, | ||
1366 | { BLK_TC_COMPLETE, "complete" }, | ||
1367 | { BLK_TC_FS, "fs" }, | ||
1368 | { BLK_TC_PC, "pc" }, | ||
1369 | { BLK_TC_AHEAD, "ahead" }, | ||
1370 | { BLK_TC_META, "meta" }, | ||
1371 | { BLK_TC_DISCARD, "discard" }, | ||
1372 | { BLK_TC_DRV_DATA, "drv_data" }, | ||
1373 | }; | ||
1374 | |||
1375 | static int blk_trace_str2mask(const char *str) | ||
1412 | { | 1376 | { |
1377 | int i; | ||
1413 | int mask = 0; | 1378 | int mask = 0; |
1414 | char *copy = kstrdup(str, GFP_KERNEL), *s; | 1379 | char *s, *token; |
1415 | 1380 | ||
1416 | if (copy == NULL) | 1381 | s = kstrdup(str, GFP_KERNEL); |
1382 | if (s == NULL) | ||
1417 | return -ENOMEM; | 1383 | return -ENOMEM; |
1418 | 1384 | s = strstrip(s); | |
1419 | s = strstrip(copy); | ||
1420 | 1385 | ||
1421 | while (1) { | 1386 | while (1) { |
1422 | char *sep = strchr(s, ','); | 1387 | token = strsep(&s, ","); |
1423 | 1388 | if (token == NULL) | |
1424 | if (sep != NULL) | ||
1425 | *sep = '\0'; | ||
1426 | |||
1427 | if (strcasecmp(s, "barrier") == 0) | ||
1428 | mask |= BLK_TC_BARRIER; | ||
1429 | else if (strcasecmp(s, "complete") == 0) | ||
1430 | mask |= BLK_TC_COMPLETE; | ||
1431 | else if (strcasecmp(s, "fs") == 0) | ||
1432 | mask |= BLK_TC_FS; | ||
1433 | else if (strcasecmp(s, "issue") == 0) | ||
1434 | mask |= BLK_TC_ISSUE; | ||
1435 | else if (strcasecmp(s, "pc") == 0) | ||
1436 | mask |= BLK_TC_PC; | ||
1437 | else if (strcasecmp(s, "queue") == 0) | ||
1438 | mask |= BLK_TC_QUEUE; | ||
1439 | else if (strcasecmp(s, "read") == 0) | ||
1440 | mask |= BLK_TC_READ; | ||
1441 | else if (strcasecmp(s, "requeue") == 0) | ||
1442 | mask |= BLK_TC_REQUEUE; | ||
1443 | else if (strcasecmp(s, "sync") == 0) | ||
1444 | mask |= BLK_TC_SYNC; | ||
1445 | else if (strcasecmp(s, "write") == 0) | ||
1446 | mask |= BLK_TC_WRITE; | ||
1447 | |||
1448 | if (sep == NULL) | ||
1449 | break; | 1389 | break; |
1450 | 1390 | ||
1451 | s = sep + 1; | 1391 | if (*token == '\0') |
1392 | continue; | ||
1393 | |||
1394 | for (i = 0; i < ARRAY_SIZE(mask_maps); i++) { | ||
1395 | if (strcasecmp(token, mask_maps[i].str) == 0) { | ||
1396 | mask |= mask_maps[i].mask; | ||
1397 | break; | ||
1398 | } | ||
1399 | } | ||
1400 | if (i == ARRAY_SIZE(mask_maps)) { | ||
1401 | mask = -EINVAL; | ||
1402 | break; | ||
1403 | } | ||
1452 | } | 1404 | } |
1453 | kfree(copy); | 1405 | kfree(s); |
1454 | 1406 | ||
1455 | return mask; | 1407 | return mask; |
1456 | } | 1408 | } |
1457 | 1409 | ||
1410 | static ssize_t blk_trace_mask2str(char *buf, int mask) | ||
1411 | { | ||
1412 | int i; | ||
1413 | char *p = buf; | ||
1414 | |||
1415 | for (i = 0; i < ARRAY_SIZE(mask_maps); i++) { | ||
1416 | if (mask & mask_maps[i].mask) { | ||
1417 | p += sprintf(p, "%s%s", | ||
1418 | (p == buf) ? "" : ",", mask_maps[i].str); | ||
1419 | } | ||
1420 | } | ||
1421 | *p++ = '\n'; | ||
1422 | |||
1423 | return p - buf; | ||
1424 | } | ||
1425 | |||
1426 | static struct request_queue *blk_trace_get_queue(struct block_device *bdev) | ||
1427 | { | ||
1428 | if (bdev->bd_disk == NULL) | ||
1429 | return NULL; | ||
1430 | |||
1431 | return bdev_get_queue(bdev); | ||
1432 | } | ||
1433 | |||
1458 | static ssize_t sysfs_blk_trace_attr_show(struct device *dev, | 1434 | static ssize_t sysfs_blk_trace_attr_show(struct device *dev, |
1459 | struct device_attribute *attr, | 1435 | struct device_attribute *attr, |
1460 | char *buf) | 1436 | char *buf) |
@@ -1469,20 +1445,29 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev, | |||
1469 | if (bdev == NULL) | 1445 | if (bdev == NULL) |
1470 | goto out_unlock_kernel; | 1446 | goto out_unlock_kernel; |
1471 | 1447 | ||
1472 | q = bdev_get_queue(bdev); | 1448 | q = blk_trace_get_queue(bdev); |
1473 | if (q == NULL) | 1449 | if (q == NULL) |
1474 | goto out_bdput; | 1450 | goto out_bdput; |
1451 | |||
1475 | mutex_lock(&bdev->bd_mutex); | 1452 | mutex_lock(&bdev->bd_mutex); |
1453 | |||
1454 | if (attr == &dev_attr_enable) { | ||
1455 | ret = sprintf(buf, "%u\n", !!q->blk_trace); | ||
1456 | goto out_unlock_bdev; | ||
1457 | } | ||
1458 | |||
1476 | if (q->blk_trace == NULL) | 1459 | if (q->blk_trace == NULL) |
1477 | ret = sprintf(buf, "disabled\n"); | 1460 | ret = sprintf(buf, "disabled\n"); |
1478 | else if (attr == &dev_attr_act_mask) | 1461 | else if (attr == &dev_attr_act_mask) |
1479 | ret = sprintf(buf, "%#x\n", q->blk_trace->act_mask); | 1462 | ret = blk_trace_mask2str(buf, q->blk_trace->act_mask); |
1480 | else if (attr == &dev_attr_pid) | 1463 | else if (attr == &dev_attr_pid) |
1481 | ret = sprintf(buf, "%u\n", q->blk_trace->pid); | 1464 | ret = sprintf(buf, "%u\n", q->blk_trace->pid); |
1482 | else if (attr == &dev_attr_start_lba) | 1465 | else if (attr == &dev_attr_start_lba) |
1483 | ret = sprintf(buf, "%llu\n", q->blk_trace->start_lba); | 1466 | ret = sprintf(buf, "%llu\n", q->blk_trace->start_lba); |
1484 | else if (attr == &dev_attr_end_lba) | 1467 | else if (attr == &dev_attr_end_lba) |
1485 | ret = sprintf(buf, "%llu\n", q->blk_trace->end_lba); | 1468 | ret = sprintf(buf, "%llu\n", q->blk_trace->end_lba); |
1469 | |||
1470 | out_unlock_bdev: | ||
1486 | mutex_unlock(&bdev->bd_mutex); | 1471 | mutex_unlock(&bdev->bd_mutex); |
1487 | out_bdput: | 1472 | out_bdput: |
1488 | bdput(bdev); | 1473 | bdput(bdev); |
@@ -1499,7 +1484,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev, | |||
1499 | struct request_queue *q; | 1484 | struct request_queue *q; |
1500 | struct hd_struct *p; | 1485 | struct hd_struct *p; |
1501 | u64 value; | 1486 | u64 value; |
1502 | ssize_t ret = -ENXIO; | 1487 | ssize_t ret = -EINVAL; |
1503 | 1488 | ||
1504 | if (count == 0) | 1489 | if (count == 0) |
1505 | goto out; | 1490 | goto out; |
@@ -1507,24 +1492,36 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev, | |||
1507 | if (attr == &dev_attr_act_mask) { | 1492 | if (attr == &dev_attr_act_mask) { |
1508 | if (sscanf(buf, "%llx", &value) != 1) { | 1493 | if (sscanf(buf, "%llx", &value) != 1) { |
1509 | /* Assume it is a list of trace category names */ | 1494 | /* Assume it is a list of trace category names */ |
1510 | value = blk_str2act_mask(buf); | 1495 | ret = blk_trace_str2mask(buf); |
1511 | if (value < 0) | 1496 | if (ret < 0) |
1512 | goto out; | 1497 | goto out; |
1498 | value = ret; | ||
1513 | } | 1499 | } |
1514 | } else if (sscanf(buf, "%llu", &value) != 1) | 1500 | } else if (sscanf(buf, "%llu", &value) != 1) |
1515 | goto out; | 1501 | goto out; |
1516 | 1502 | ||
1503 | ret = -ENXIO; | ||
1504 | |||
1517 | lock_kernel(); | 1505 | lock_kernel(); |
1518 | p = dev_to_part(dev); | 1506 | p = dev_to_part(dev); |
1519 | bdev = bdget(part_devt(p)); | 1507 | bdev = bdget(part_devt(p)); |
1520 | if (bdev == NULL) | 1508 | if (bdev == NULL) |
1521 | goto out_unlock_kernel; | 1509 | goto out_unlock_kernel; |
1522 | 1510 | ||
1523 | q = bdev_get_queue(bdev); | 1511 | q = blk_trace_get_queue(bdev); |
1524 | if (q == NULL) | 1512 | if (q == NULL) |
1525 | goto out_bdput; | 1513 | goto out_bdput; |
1526 | 1514 | ||
1527 | mutex_lock(&bdev->bd_mutex); | 1515 | mutex_lock(&bdev->bd_mutex); |
1516 | |||
1517 | if (attr == &dev_attr_enable) { | ||
1518 | if (value) | ||
1519 | ret = blk_trace_setup_queue(q, bdev->bd_dev); | ||
1520 | else | ||
1521 | ret = blk_trace_remove_queue(q); | ||
1522 | goto out_unlock_bdev; | ||
1523 | } | ||
1524 | |||
1528 | ret = 0; | 1525 | ret = 0; |
1529 | if (q->blk_trace == NULL) | 1526 | if (q->blk_trace == NULL) |
1530 | ret = blk_trace_setup_queue(q, bdev->bd_dev); | 1527 | ret = blk_trace_setup_queue(q, bdev->bd_dev); |
@@ -1538,13 +1535,15 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev, | |||
1538 | q->blk_trace->start_lba = value; | 1535 | q->blk_trace->start_lba = value; |
1539 | else if (attr == &dev_attr_end_lba) | 1536 | else if (attr == &dev_attr_end_lba) |
1540 | q->blk_trace->end_lba = value; | 1537 | q->blk_trace->end_lba = value; |
1541 | ret = count; | ||
1542 | } | 1538 | } |
1539 | |||
1540 | out_unlock_bdev: | ||
1543 | mutex_unlock(&bdev->bd_mutex); | 1541 | mutex_unlock(&bdev->bd_mutex); |
1544 | out_bdput: | 1542 | out_bdput: |
1545 | bdput(bdev); | 1543 | bdput(bdev); |
1546 | out_unlock_kernel: | 1544 | out_unlock_kernel: |
1547 | unlock_kernel(); | 1545 | unlock_kernel(); |
1548 | out: | 1546 | out: |
1549 | return ret; | 1547 | return ret ? ret : count; |
1550 | } | 1548 | } |
1549 | |||
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 7847806eefef..1752a63f37c0 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c | |||
@@ -29,6 +29,8 @@ | |||
29 | #include <linux/list.h> | 29 | #include <linux/list.h> |
30 | #include <linux/hash.h> | 30 | #include <linux/hash.h> |
31 | 31 | ||
32 | #include <trace/sched.h> | ||
33 | |||
32 | #include <asm/ftrace.h> | 34 | #include <asm/ftrace.h> |
33 | 35 | ||
34 | #include "trace.h" | 36 | #include "trace.h" |
@@ -339,7 +341,7 @@ static inline int record_frozen(struct dyn_ftrace *rec) | |||
339 | 341 | ||
340 | static void ftrace_free_rec(struct dyn_ftrace *rec) | 342 | static void ftrace_free_rec(struct dyn_ftrace *rec) |
341 | { | 343 | { |
342 | rec->ip = (unsigned long)ftrace_free_records; | 344 | rec->freelist = ftrace_free_records; |
343 | ftrace_free_records = rec; | 345 | ftrace_free_records = rec; |
344 | rec->flags |= FTRACE_FL_FREE; | 346 | rec->flags |= FTRACE_FL_FREE; |
345 | } | 347 | } |
@@ -356,9 +358,14 @@ void ftrace_release(void *start, unsigned long size) | |||
356 | 358 | ||
357 | mutex_lock(&ftrace_lock); | 359 | mutex_lock(&ftrace_lock); |
358 | do_for_each_ftrace_rec(pg, rec) { | 360 | do_for_each_ftrace_rec(pg, rec) { |
359 | if ((rec->ip >= s) && (rec->ip < e) && | 361 | if ((rec->ip >= s) && (rec->ip < e)) { |
360 | !(rec->flags & FTRACE_FL_FREE)) | 362 | /* |
363 | * rec->ip is changed in ftrace_free_rec() | ||
364 | * It should not between s and e if record was freed. | ||
365 | */ | ||
366 | FTRACE_WARN_ON(rec->flags & FTRACE_FL_FREE); | ||
361 | ftrace_free_rec(rec); | 367 | ftrace_free_rec(rec); |
368 | } | ||
362 | } while_for_each_ftrace_rec(); | 369 | } while_for_each_ftrace_rec(); |
363 | mutex_unlock(&ftrace_lock); | 370 | mutex_unlock(&ftrace_lock); |
364 | } | 371 | } |
@@ -377,7 +384,7 @@ static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip) | |||
377 | return NULL; | 384 | return NULL; |
378 | } | 385 | } |
379 | 386 | ||
380 | ftrace_free_records = (void *)rec->ip; | 387 | ftrace_free_records = rec->freelist; |
381 | memset(rec, 0, sizeof(*rec)); | 388 | memset(rec, 0, sizeof(*rec)); |
382 | return rec; | 389 | return rec; |
383 | } | 390 | } |
@@ -409,7 +416,7 @@ ftrace_record_ip(unsigned long ip) | |||
409 | return NULL; | 416 | return NULL; |
410 | 417 | ||
411 | rec->ip = ip; | 418 | rec->ip = ip; |
412 | rec->flags = (unsigned long)ftrace_new_addrs; | 419 | rec->newlist = ftrace_new_addrs; |
413 | ftrace_new_addrs = rec; | 420 | ftrace_new_addrs = rec; |
414 | 421 | ||
415 | return rec; | 422 | return rec; |
@@ -729,7 +736,7 @@ static int ftrace_update_code(struct module *mod) | |||
729 | return -1; | 736 | return -1; |
730 | 737 | ||
731 | p = ftrace_new_addrs; | 738 | p = ftrace_new_addrs; |
732 | ftrace_new_addrs = (struct dyn_ftrace *)p->flags; | 739 | ftrace_new_addrs = p->newlist; |
733 | p->flags = 0L; | 740 | p->flags = 0L; |
734 | 741 | ||
735 | /* convert record (i.e, patch mcount-call with NOP) */ | 742 | /* convert record (i.e, patch mcount-call with NOP) */ |
@@ -2262,7 +2269,7 @@ ftrace_pid_read(struct file *file, char __user *ubuf, | |||
2262 | if (ftrace_pid_trace == ftrace_swapper_pid) | 2269 | if (ftrace_pid_trace == ftrace_swapper_pid) |
2263 | r = sprintf(buf, "swapper tasks\n"); | 2270 | r = sprintf(buf, "swapper tasks\n"); |
2264 | else if (ftrace_pid_trace) | 2271 | else if (ftrace_pid_trace) |
2265 | r = sprintf(buf, "%u\n", pid_nr(ftrace_pid_trace)); | 2272 | r = sprintf(buf, "%u\n", pid_vnr(ftrace_pid_trace)); |
2266 | else | 2273 | else |
2267 | r = sprintf(buf, "no pid\n"); | 2274 | r = sprintf(buf, "no pid\n"); |
2268 | 2275 | ||
@@ -2590,6 +2597,38 @@ free: | |||
2590 | return ret; | 2597 | return ret; |
2591 | } | 2598 | } |
2592 | 2599 | ||
2600 | static void | ||
2601 | ftrace_graph_probe_sched_switch(struct rq *__rq, struct task_struct *prev, | ||
2602 | struct task_struct *next) | ||
2603 | { | ||
2604 | unsigned long long timestamp; | ||
2605 | int index; | ||
2606 | |||
2607 | /* | ||
2608 | * Does the user want to count the time a function was asleep. | ||
2609 | * If so, do not update the time stamps. | ||
2610 | */ | ||
2611 | if (trace_flags & TRACE_ITER_SLEEP_TIME) | ||
2612 | return; | ||
2613 | |||
2614 | timestamp = trace_clock_local(); | ||
2615 | |||
2616 | prev->ftrace_timestamp = timestamp; | ||
2617 | |||
2618 | /* only process tasks that we timestamped */ | ||
2619 | if (!next->ftrace_timestamp) | ||
2620 | return; | ||
2621 | |||
2622 | /* | ||
2623 | * Update all the counters in next to make up for the | ||
2624 | * time next was sleeping. | ||
2625 | */ | ||
2626 | timestamp -= next->ftrace_timestamp; | ||
2627 | |||
2628 | for (index = next->curr_ret_stack; index >= 0; index--) | ||
2629 | next->ret_stack[index].calltime += timestamp; | ||
2630 | } | ||
2631 | |||
2593 | /* Allocate a return stack for each task */ | 2632 | /* Allocate a return stack for each task */ |
2594 | static int start_graph_tracing(void) | 2633 | static int start_graph_tracing(void) |
2595 | { | 2634 | { |
@@ -2611,6 +2650,13 @@ static int start_graph_tracing(void) | |||
2611 | ret = alloc_retstack_tasklist(ret_stack_list); | 2650 | ret = alloc_retstack_tasklist(ret_stack_list); |
2612 | } while (ret == -EAGAIN); | 2651 | } while (ret == -EAGAIN); |
2613 | 2652 | ||
2653 | if (!ret) { | ||
2654 | ret = register_trace_sched_switch(ftrace_graph_probe_sched_switch); | ||
2655 | if (ret) | ||
2656 | pr_info("ftrace_graph: Couldn't activate tracepoint" | ||
2657 | " probe to kernel_sched_switch\n"); | ||
2658 | } | ||
2659 | |||
2614 | kfree(ret_stack_list); | 2660 | kfree(ret_stack_list); |
2615 | return ret; | 2661 | return ret; |
2616 | } | 2662 | } |
@@ -2643,6 +2689,12 @@ int register_ftrace_graph(trace_func_graph_ret_t retfunc, | |||
2643 | 2689 | ||
2644 | mutex_lock(&ftrace_lock); | 2690 | mutex_lock(&ftrace_lock); |
2645 | 2691 | ||
2692 | /* we currently allow only one tracer registered at a time */ | ||
2693 | if (atomic_read(&ftrace_graph_active)) { | ||
2694 | ret = -EBUSY; | ||
2695 | goto out; | ||
2696 | } | ||
2697 | |||
2646 | ftrace_suspend_notifier.notifier_call = ftrace_suspend_notifier_call; | 2698 | ftrace_suspend_notifier.notifier_call = ftrace_suspend_notifier_call; |
2647 | register_pm_notifier(&ftrace_suspend_notifier); | 2699 | register_pm_notifier(&ftrace_suspend_notifier); |
2648 | 2700 | ||
@@ -2668,6 +2720,7 @@ void unregister_ftrace_graph(void) | |||
2668 | mutex_lock(&ftrace_lock); | 2720 | mutex_lock(&ftrace_lock); |
2669 | 2721 | ||
2670 | atomic_dec(&ftrace_graph_active); | 2722 | atomic_dec(&ftrace_graph_active); |
2723 | unregister_trace_sched_switch(ftrace_graph_probe_sched_switch); | ||
2671 | ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub; | 2724 | ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub; |
2672 | ftrace_graph_entry = ftrace_graph_entry_stub; | 2725 | ftrace_graph_entry = ftrace_graph_entry_stub; |
2673 | ftrace_shutdown(FTRACE_STOP_FUNC_RET); | 2726 | ftrace_shutdown(FTRACE_STOP_FUNC_RET); |
@@ -2688,6 +2741,7 @@ void ftrace_graph_init_task(struct task_struct *t) | |||
2688 | t->curr_ret_stack = -1; | 2741 | t->curr_ret_stack = -1; |
2689 | atomic_set(&t->tracing_graph_pause, 0); | 2742 | atomic_set(&t->tracing_graph_pause, 0); |
2690 | atomic_set(&t->trace_overrun, 0); | 2743 | atomic_set(&t->trace_overrun, 0); |
2744 | t->ftrace_timestamp = 0; | ||
2691 | } else | 2745 | } else |
2692 | t->ret_stack = NULL; | 2746 | t->ret_stack = NULL; |
2693 | } | 2747 | } |
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 808b14bbf076..edce2ff38944 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c | |||
@@ -189,16 +189,65 @@ enum { | |||
189 | RB_LEN_TIME_STAMP = 16, | 189 | RB_LEN_TIME_STAMP = 16, |
190 | }; | 190 | }; |
191 | 191 | ||
192 | /* inline for ring buffer fast paths */ | 192 | static inline int rb_null_event(struct ring_buffer_event *event) |
193 | { | ||
194 | return event->type == RINGBUF_TYPE_PADDING && event->time_delta == 0; | ||
195 | } | ||
196 | |||
197 | static inline int rb_discarded_event(struct ring_buffer_event *event) | ||
198 | { | ||
199 | return event->type == RINGBUF_TYPE_PADDING && event->time_delta; | ||
200 | } | ||
201 | |||
202 | static void rb_event_set_padding(struct ring_buffer_event *event) | ||
203 | { | ||
204 | event->type = RINGBUF_TYPE_PADDING; | ||
205 | event->time_delta = 0; | ||
206 | } | ||
207 | |||
208 | /** | ||
209 | * ring_buffer_event_discard - discard an event in the ring buffer | ||
210 | * @buffer: the ring buffer | ||
211 | * @event: the event to discard | ||
212 | * | ||
213 | * Sometimes a event that is in the ring buffer needs to be ignored. | ||
214 | * This function lets the user discard an event in the ring buffer | ||
215 | * and then that event will not be read later. | ||
216 | * | ||
217 | * Note, it is up to the user to be careful with this, and protect | ||
218 | * against races. If the user discards an event that has been consumed | ||
219 | * it is possible that it could corrupt the ring buffer. | ||
220 | */ | ||
221 | void ring_buffer_event_discard(struct ring_buffer_event *event) | ||
222 | { | ||
223 | event->type = RINGBUF_TYPE_PADDING; | ||
224 | /* time delta must be non zero */ | ||
225 | if (!event->time_delta) | ||
226 | event->time_delta = 1; | ||
227 | } | ||
228 | |||
193 | static unsigned | 229 | static unsigned |
194 | rb_event_length(struct ring_buffer_event *event) | 230 | rb_event_data_length(struct ring_buffer_event *event) |
195 | { | 231 | { |
196 | unsigned length; | 232 | unsigned length; |
197 | 233 | ||
234 | if (event->len) | ||
235 | length = event->len * RB_ALIGNMENT; | ||
236 | else | ||
237 | length = event->array[0]; | ||
238 | return length + RB_EVNT_HDR_SIZE; | ||
239 | } | ||
240 | |||
241 | /* inline for ring buffer fast paths */ | ||
242 | static unsigned | ||
243 | rb_event_length(struct ring_buffer_event *event) | ||
244 | { | ||
198 | switch (event->type) { | 245 | switch (event->type) { |
199 | case RINGBUF_TYPE_PADDING: | 246 | case RINGBUF_TYPE_PADDING: |
200 | /* undefined */ | 247 | if (rb_null_event(event)) |
201 | return -1; | 248 | /* undefined */ |
249 | return -1; | ||
250 | return rb_event_data_length(event); | ||
202 | 251 | ||
203 | case RINGBUF_TYPE_TIME_EXTEND: | 252 | case RINGBUF_TYPE_TIME_EXTEND: |
204 | return RB_LEN_TIME_EXTEND; | 253 | return RB_LEN_TIME_EXTEND; |
@@ -207,11 +256,7 @@ rb_event_length(struct ring_buffer_event *event) | |||
207 | return RB_LEN_TIME_STAMP; | 256 | return RB_LEN_TIME_STAMP; |
208 | 257 | ||
209 | case RINGBUF_TYPE_DATA: | 258 | case RINGBUF_TYPE_DATA: |
210 | if (event->len) | 259 | return rb_event_data_length(event); |
211 | length = event->len * RB_ALIGNMENT; | ||
212 | else | ||
213 | length = event->array[0]; | ||
214 | return length + RB_EVNT_HDR_SIZE; | ||
215 | default: | 260 | default: |
216 | BUG(); | 261 | BUG(); |
217 | } | 262 | } |
@@ -845,11 +890,6 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size) | |||
845 | } | 890 | } |
846 | EXPORT_SYMBOL_GPL(ring_buffer_resize); | 891 | EXPORT_SYMBOL_GPL(ring_buffer_resize); |
847 | 892 | ||
848 | static inline int rb_null_event(struct ring_buffer_event *event) | ||
849 | { | ||
850 | return event->type == RINGBUF_TYPE_PADDING; | ||
851 | } | ||
852 | |||
853 | static inline void * | 893 | static inline void * |
854 | __rb_data_page_index(struct buffer_data_page *bpage, unsigned index) | 894 | __rb_data_page_index(struct buffer_data_page *bpage, unsigned index) |
855 | { | 895 | { |
@@ -1219,7 +1259,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, | |||
1219 | if (tail < BUF_PAGE_SIZE) { | 1259 | if (tail < BUF_PAGE_SIZE) { |
1220 | /* Mark the rest of the page with padding */ | 1260 | /* Mark the rest of the page with padding */ |
1221 | event = __rb_page_index(tail_page, tail); | 1261 | event = __rb_page_index(tail_page, tail); |
1222 | event->type = RINGBUF_TYPE_PADDING; | 1262 | rb_event_set_padding(event); |
1223 | } | 1263 | } |
1224 | 1264 | ||
1225 | if (tail <= BUF_PAGE_SIZE) | 1265 | if (tail <= BUF_PAGE_SIZE) |
@@ -1969,7 +2009,7 @@ static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer) | |||
1969 | 2009 | ||
1970 | event = rb_reader_event(cpu_buffer); | 2010 | event = rb_reader_event(cpu_buffer); |
1971 | 2011 | ||
1972 | if (event->type == RINGBUF_TYPE_DATA) | 2012 | if (event->type == RINGBUF_TYPE_DATA || rb_discarded_event(event)) |
1973 | cpu_buffer->entries--; | 2013 | cpu_buffer->entries--; |
1974 | 2014 | ||
1975 | rb_update_read_stamp(cpu_buffer, event); | 2015 | rb_update_read_stamp(cpu_buffer, event); |
@@ -2052,9 +2092,18 @@ rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) | |||
2052 | 2092 | ||
2053 | switch (event->type) { | 2093 | switch (event->type) { |
2054 | case RINGBUF_TYPE_PADDING: | 2094 | case RINGBUF_TYPE_PADDING: |
2055 | RB_WARN_ON(cpu_buffer, 1); | 2095 | if (rb_null_event(event)) |
2096 | RB_WARN_ON(cpu_buffer, 1); | ||
2097 | /* | ||
2098 | * Because the writer could be discarding every | ||
2099 | * event it creates (which would probably be bad) | ||
2100 | * if we were to go back to "again" then we may never | ||
2101 | * catch up, and will trigger the warn on, or lock | ||
2102 | * the box. Return the padding, and we will release | ||
2103 | * the current locks, and try again. | ||
2104 | */ | ||
2056 | rb_advance_reader(cpu_buffer); | 2105 | rb_advance_reader(cpu_buffer); |
2057 | return NULL; | 2106 | return event; |
2058 | 2107 | ||
2059 | case RINGBUF_TYPE_TIME_EXTEND: | 2108 | case RINGBUF_TYPE_TIME_EXTEND: |
2060 | /* Internal data, OK to advance */ | 2109 | /* Internal data, OK to advance */ |
@@ -2115,8 +2164,12 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts) | |||
2115 | 2164 | ||
2116 | switch (event->type) { | 2165 | switch (event->type) { |
2117 | case RINGBUF_TYPE_PADDING: | 2166 | case RINGBUF_TYPE_PADDING: |
2118 | rb_inc_iter(iter); | 2167 | if (rb_null_event(event)) { |
2119 | goto again; | 2168 | rb_inc_iter(iter); |
2169 | goto again; | ||
2170 | } | ||
2171 | rb_advance_iter(iter); | ||
2172 | return event; | ||
2120 | 2173 | ||
2121 | case RINGBUF_TYPE_TIME_EXTEND: | 2174 | case RINGBUF_TYPE_TIME_EXTEND: |
2122 | /* Internal data, OK to advance */ | 2175 | /* Internal data, OK to advance */ |
@@ -2163,10 +2216,16 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) | |||
2163 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) | 2216 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) |
2164 | return NULL; | 2217 | return NULL; |
2165 | 2218 | ||
2219 | again: | ||
2166 | spin_lock_irqsave(&cpu_buffer->reader_lock, flags); | 2220 | spin_lock_irqsave(&cpu_buffer->reader_lock, flags); |
2167 | event = rb_buffer_peek(buffer, cpu, ts); | 2221 | event = rb_buffer_peek(buffer, cpu, ts); |
2168 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); | 2222 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); |
2169 | 2223 | ||
2224 | if (event && event->type == RINGBUF_TYPE_PADDING) { | ||
2225 | cpu_relax(); | ||
2226 | goto again; | ||
2227 | } | ||
2228 | |||
2170 | return event; | 2229 | return event; |
2171 | } | 2230 | } |
2172 | 2231 | ||
@@ -2185,10 +2244,16 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts) | |||
2185 | struct ring_buffer_event *event; | 2244 | struct ring_buffer_event *event; |
2186 | unsigned long flags; | 2245 | unsigned long flags; |
2187 | 2246 | ||
2247 | again: | ||
2188 | spin_lock_irqsave(&cpu_buffer->reader_lock, flags); | 2248 | spin_lock_irqsave(&cpu_buffer->reader_lock, flags); |
2189 | event = rb_iter_peek(iter, ts); | 2249 | event = rb_iter_peek(iter, ts); |
2190 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); | 2250 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); |
2191 | 2251 | ||
2252 | if (event && event->type == RINGBUF_TYPE_PADDING) { | ||
2253 | cpu_relax(); | ||
2254 | goto again; | ||
2255 | } | ||
2256 | |||
2192 | return event; | 2257 | return event; |
2193 | } | 2258 | } |
2194 | 2259 | ||
@@ -2207,6 +2272,7 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts) | |||
2207 | struct ring_buffer_event *event = NULL; | 2272 | struct ring_buffer_event *event = NULL; |
2208 | unsigned long flags; | 2273 | unsigned long flags; |
2209 | 2274 | ||
2275 | again: | ||
2210 | /* might be called in atomic */ | 2276 | /* might be called in atomic */ |
2211 | preempt_disable(); | 2277 | preempt_disable(); |
2212 | 2278 | ||
@@ -2228,6 +2294,11 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts) | |||
2228 | out: | 2294 | out: |
2229 | preempt_enable(); | 2295 | preempt_enable(); |
2230 | 2296 | ||
2297 | if (event && event->type == RINGBUF_TYPE_PADDING) { | ||
2298 | cpu_relax(); | ||
2299 | goto again; | ||
2300 | } | ||
2301 | |||
2231 | return event; | 2302 | return event; |
2232 | } | 2303 | } |
2233 | EXPORT_SYMBOL_GPL(ring_buffer_consume); | 2304 | EXPORT_SYMBOL_GPL(ring_buffer_consume); |
@@ -2306,6 +2377,7 @@ ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts) | |||
2306 | struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; | 2377 | struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; |
2307 | unsigned long flags; | 2378 | unsigned long flags; |
2308 | 2379 | ||
2380 | again: | ||
2309 | spin_lock_irqsave(&cpu_buffer->reader_lock, flags); | 2381 | spin_lock_irqsave(&cpu_buffer->reader_lock, flags); |
2310 | event = rb_iter_peek(iter, ts); | 2382 | event = rb_iter_peek(iter, ts); |
2311 | if (!event) | 2383 | if (!event) |
@@ -2315,6 +2387,11 @@ ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts) | |||
2315 | out: | 2387 | out: |
2316 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); | 2388 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); |
2317 | 2389 | ||
2390 | if (event && event->type == RINGBUF_TYPE_PADDING) { | ||
2391 | cpu_relax(); | ||
2392 | goto again; | ||
2393 | } | ||
2394 | |||
2318 | return event; | 2395 | return event; |
2319 | } | 2396 | } |
2320 | EXPORT_SYMBOL_GPL(ring_buffer_read); | 2397 | EXPORT_SYMBOL_GPL(ring_buffer_read); |
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index e6fac0ffe6f0..a0174a40c563 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c | |||
@@ -255,7 +255,7 @@ static DECLARE_WAIT_QUEUE_HEAD(trace_wait); | |||
255 | 255 | ||
256 | /* trace_flags holds trace_options default values */ | 256 | /* trace_flags holds trace_options default values */ |
257 | unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | | 257 | unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | |
258 | TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO; | 258 | TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME; |
259 | 259 | ||
260 | /** | 260 | /** |
261 | * trace_wake_up - wake up tasks waiting for trace input | 261 | * trace_wake_up - wake up tasks waiting for trace input |
@@ -316,6 +316,7 @@ static const char *trace_options[] = { | |||
316 | "context-info", | 316 | "context-info", |
317 | "latency-format", | 317 | "latency-format", |
318 | "global-clock", | 318 | "global-clock", |
319 | "sleep-time", | ||
319 | NULL | 320 | NULL |
320 | }; | 321 | }; |
321 | 322 | ||
@@ -382,7 +383,7 @@ ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt) | |||
382 | return cnt; | 383 | return cnt; |
383 | } | 384 | } |
384 | 385 | ||
385 | ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt) | 386 | static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt) |
386 | { | 387 | { |
387 | int len; | 388 | int len; |
388 | void *ret; | 389 | void *ret; |
@@ -860,15 +861,25 @@ static void ftrace_trace_stack(struct trace_array *tr, | |||
860 | static void ftrace_trace_userstack(struct trace_array *tr, | 861 | static void ftrace_trace_userstack(struct trace_array *tr, |
861 | unsigned long flags, int pc); | 862 | unsigned long flags, int pc); |
862 | 863 | ||
863 | void trace_buffer_unlock_commit(struct trace_array *tr, | 864 | static inline void __trace_buffer_unlock_commit(struct trace_array *tr, |
864 | struct ring_buffer_event *event, | 865 | struct ring_buffer_event *event, |
865 | unsigned long flags, int pc) | 866 | unsigned long flags, int pc, |
867 | int wake) | ||
866 | { | 868 | { |
867 | ring_buffer_unlock_commit(tr->buffer, event); | 869 | ring_buffer_unlock_commit(tr->buffer, event); |
868 | 870 | ||
869 | ftrace_trace_stack(tr, flags, 6, pc); | 871 | ftrace_trace_stack(tr, flags, 6, pc); |
870 | ftrace_trace_userstack(tr, flags, pc); | 872 | ftrace_trace_userstack(tr, flags, pc); |
871 | trace_wake_up(); | 873 | |
874 | if (wake) | ||
875 | trace_wake_up(); | ||
876 | } | ||
877 | |||
878 | void trace_buffer_unlock_commit(struct trace_array *tr, | ||
879 | struct ring_buffer_event *event, | ||
880 | unsigned long flags, int pc) | ||
881 | { | ||
882 | __trace_buffer_unlock_commit(tr, event, flags, pc, 1); | ||
872 | } | 883 | } |
873 | 884 | ||
874 | struct ring_buffer_event * | 885 | struct ring_buffer_event * |
@@ -882,7 +893,13 @@ trace_current_buffer_lock_reserve(unsigned char type, unsigned long len, | |||
882 | void trace_current_buffer_unlock_commit(struct ring_buffer_event *event, | 893 | void trace_current_buffer_unlock_commit(struct ring_buffer_event *event, |
883 | unsigned long flags, int pc) | 894 | unsigned long flags, int pc) |
884 | { | 895 | { |
885 | return trace_buffer_unlock_commit(&global_trace, event, flags, pc); | 896 | return __trace_buffer_unlock_commit(&global_trace, event, flags, pc, 1); |
897 | } | ||
898 | |||
899 | void trace_nowake_buffer_unlock_commit(struct ring_buffer_event *event, | ||
900 | unsigned long flags, int pc) | ||
901 | { | ||
902 | return __trace_buffer_unlock_commit(&global_trace, event, flags, pc, 0); | ||
886 | } | 903 | } |
887 | 904 | ||
888 | void | 905 | void |
@@ -908,7 +925,7 @@ trace_function(struct trace_array *tr, | |||
908 | } | 925 | } |
909 | 926 | ||
910 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | 927 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER |
911 | static void __trace_graph_entry(struct trace_array *tr, | 928 | static int __trace_graph_entry(struct trace_array *tr, |
912 | struct ftrace_graph_ent *trace, | 929 | struct ftrace_graph_ent *trace, |
913 | unsigned long flags, | 930 | unsigned long flags, |
914 | int pc) | 931 | int pc) |
@@ -917,15 +934,17 @@ static void __trace_graph_entry(struct trace_array *tr, | |||
917 | struct ftrace_graph_ent_entry *entry; | 934 | struct ftrace_graph_ent_entry *entry; |
918 | 935 | ||
919 | if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) | 936 | if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) |
920 | return; | 937 | return 0; |
921 | 938 | ||
922 | event = trace_buffer_lock_reserve(&global_trace, TRACE_GRAPH_ENT, | 939 | event = trace_buffer_lock_reserve(&global_trace, TRACE_GRAPH_ENT, |
923 | sizeof(*entry), flags, pc); | 940 | sizeof(*entry), flags, pc); |
924 | if (!event) | 941 | if (!event) |
925 | return; | 942 | return 0; |
926 | entry = ring_buffer_event_data(event); | 943 | entry = ring_buffer_event_data(event); |
927 | entry->graph_ent = *trace; | 944 | entry->graph_ent = *trace; |
928 | ring_buffer_unlock_commit(global_trace.buffer, event); | 945 | ring_buffer_unlock_commit(global_trace.buffer, event); |
946 | |||
947 | return 1; | ||
929 | } | 948 | } |
930 | 949 | ||
931 | static void __trace_graph_return(struct trace_array *tr, | 950 | static void __trace_graph_return(struct trace_array *tr, |
@@ -1146,6 +1165,7 @@ int trace_graph_entry(struct ftrace_graph_ent *trace) | |||
1146 | struct trace_array_cpu *data; | 1165 | struct trace_array_cpu *data; |
1147 | unsigned long flags; | 1166 | unsigned long flags; |
1148 | long disabled; | 1167 | long disabled; |
1168 | int ret; | ||
1149 | int cpu; | 1169 | int cpu; |
1150 | int pc; | 1170 | int pc; |
1151 | 1171 | ||
@@ -1161,15 +1181,18 @@ int trace_graph_entry(struct ftrace_graph_ent *trace) | |||
1161 | disabled = atomic_inc_return(&data->disabled); | 1181 | disabled = atomic_inc_return(&data->disabled); |
1162 | if (likely(disabled == 1)) { | 1182 | if (likely(disabled == 1)) { |
1163 | pc = preempt_count(); | 1183 | pc = preempt_count(); |
1164 | __trace_graph_entry(tr, trace, flags, pc); | 1184 | ret = __trace_graph_entry(tr, trace, flags, pc); |
1185 | } else { | ||
1186 | ret = 0; | ||
1165 | } | 1187 | } |
1166 | /* Only do the atomic if it is not already set */ | 1188 | /* Only do the atomic if it is not already set */ |
1167 | if (!test_tsk_trace_graph(current)) | 1189 | if (!test_tsk_trace_graph(current)) |
1168 | set_tsk_trace_graph(current); | 1190 | set_tsk_trace_graph(current); |
1191 | |||
1169 | atomic_dec(&data->disabled); | 1192 | atomic_dec(&data->disabled); |
1170 | local_irq_restore(flags); | 1193 | local_irq_restore(flags); |
1171 | 1194 | ||
1172 | return 1; | 1195 | return ret; |
1173 | } | 1196 | } |
1174 | 1197 | ||
1175 | void trace_graph_return(struct ftrace_graph_ret *trace) | 1198 | void trace_graph_return(struct ftrace_graph_ret *trace) |
@@ -3513,6 +3536,9 @@ struct dentry *tracing_init_dentry(void) | |||
3513 | if (d_tracer) | 3536 | if (d_tracer) |
3514 | return d_tracer; | 3537 | return d_tracer; |
3515 | 3538 | ||
3539 | if (!debugfs_initialized()) | ||
3540 | return NULL; | ||
3541 | |||
3516 | d_tracer = debugfs_create_dir("tracing", NULL); | 3542 | d_tracer = debugfs_create_dir("tracing", NULL); |
3517 | 3543 | ||
3518 | if (!d_tracer && !once) { | 3544 | if (!d_tracer && !once) { |
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 7cfb741be200..cb0ce3fc36d3 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h | |||
@@ -483,6 +483,8 @@ trace_current_buffer_lock_reserve(unsigned char type, unsigned long len, | |||
483 | unsigned long flags, int pc); | 483 | unsigned long flags, int pc); |
484 | void trace_current_buffer_unlock_commit(struct ring_buffer_event *event, | 484 | void trace_current_buffer_unlock_commit(struct ring_buffer_event *event, |
485 | unsigned long flags, int pc); | 485 | unsigned long flags, int pc); |
486 | void trace_nowake_buffer_unlock_commit(struct ring_buffer_event *event, | ||
487 | unsigned long flags, int pc); | ||
486 | 488 | ||
487 | struct trace_entry *tracing_get_trace_entry(struct trace_array *tr, | 489 | struct trace_entry *tracing_get_trace_entry(struct trace_array *tr, |
488 | struct trace_array_cpu *data); | 490 | struct trace_array_cpu *data); |
@@ -683,6 +685,7 @@ enum trace_iterator_flags { | |||
683 | TRACE_ITER_CONTEXT_INFO = 0x20000, /* Print pid/cpu/time */ | 685 | TRACE_ITER_CONTEXT_INFO = 0x20000, /* Print pid/cpu/time */ |
684 | TRACE_ITER_LATENCY_FMT = 0x40000, | 686 | TRACE_ITER_LATENCY_FMT = 0x40000, |
685 | TRACE_ITER_GLOBAL_CLK = 0x80000, | 687 | TRACE_ITER_GLOBAL_CLK = 0x80000, |
688 | TRACE_ITER_SLEEP_TIME = 0x100000, | ||
686 | }; | 689 | }; |
687 | 690 | ||
688 | /* | 691 | /* |
@@ -775,16 +778,27 @@ enum { | |||
775 | TRACE_EVENT_TYPE_RAW = 2, | 778 | TRACE_EVENT_TYPE_RAW = 2, |
776 | }; | 779 | }; |
777 | 780 | ||
781 | struct ftrace_event_field { | ||
782 | struct list_head link; | ||
783 | char *name; | ||
784 | char *type; | ||
785 | int offset; | ||
786 | int size; | ||
787 | }; | ||
788 | |||
778 | struct ftrace_event_call { | 789 | struct ftrace_event_call { |
779 | char *name; | 790 | char *name; |
780 | char *system; | 791 | char *system; |
781 | struct dentry *dir; | 792 | struct dentry *dir; |
782 | int enabled; | 793 | int enabled; |
783 | int (*regfunc)(void); | 794 | int (*regfunc)(void); |
784 | void (*unregfunc)(void); | 795 | void (*unregfunc)(void); |
785 | int id; | 796 | int id; |
786 | int (*raw_init)(void); | 797 | int (*raw_init)(void); |
787 | int (*show_format)(struct trace_seq *s); | 798 | int (*show_format)(struct trace_seq *s); |
799 | int (*define_fields)(void); | ||
800 | struct list_head fields; | ||
801 | struct filter_pred **preds; | ||
788 | 802 | ||
789 | #ifdef CONFIG_EVENT_PROFILE | 803 | #ifdef CONFIG_EVENT_PROFILE |
790 | atomic_t profile_count; | 804 | atomic_t profile_count; |
@@ -793,6 +807,51 @@ struct ftrace_event_call { | |||
793 | #endif | 807 | #endif |
794 | }; | 808 | }; |
795 | 809 | ||
810 | struct event_subsystem { | ||
811 | struct list_head list; | ||
812 | const char *name; | ||
813 | struct dentry *entry; | ||
814 | struct filter_pred **preds; | ||
815 | }; | ||
816 | |||
817 | #define events_for_each(event) \ | ||
818 | for (event = __start_ftrace_events; \ | ||
819 | (unsigned long)event < (unsigned long)__stop_ftrace_events; \ | ||
820 | event++) | ||
821 | |||
822 | #define MAX_FILTER_PRED 8 | ||
823 | |||
824 | struct filter_pred; | ||
825 | |||
826 | typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event); | ||
827 | |||
828 | struct filter_pred { | ||
829 | filter_pred_fn_t fn; | ||
830 | u64 val; | ||
831 | char *str_val; | ||
832 | int str_len; | ||
833 | char *field_name; | ||
834 | int offset; | ||
835 | int not; | ||
836 | int or; | ||
837 | int compound; | ||
838 | int clear; | ||
839 | }; | ||
840 | |||
841 | int trace_define_field(struct ftrace_event_call *call, char *type, | ||
842 | char *name, int offset, int size); | ||
843 | extern void filter_free_pred(struct filter_pred *pred); | ||
844 | extern void filter_print_preds(struct filter_pred **preds, | ||
845 | struct trace_seq *s); | ||
846 | extern int filter_parse(char **pbuf, struct filter_pred *pred); | ||
847 | extern int filter_add_pred(struct ftrace_event_call *call, | ||
848 | struct filter_pred *pred); | ||
849 | extern void filter_free_preds(struct ftrace_event_call *call); | ||
850 | extern int filter_match_preds(struct ftrace_event_call *call, void *rec); | ||
851 | extern void filter_free_subsystem_preds(struct event_subsystem *system); | ||
852 | extern int filter_add_subsystem_pred(struct event_subsystem *system, | ||
853 | struct filter_pred *pred); | ||
854 | |||
796 | void event_trace_printk(unsigned long ip, const char *fmt, ...); | 855 | void event_trace_printk(unsigned long ip, const char *fmt, ...); |
797 | extern struct ftrace_event_call __start_ftrace_events[]; | 856 | extern struct ftrace_event_call __start_ftrace_events[]; |
798 | extern struct ftrace_event_call __stop_ftrace_events[]; | 857 | extern struct ftrace_event_call __stop_ftrace_events[]; |
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c index 05b176abfd30..b588fd81f7f9 100644 --- a/kernel/trace/trace_clock.c +++ b/kernel/trace/trace_clock.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <linux/percpu.h> | 18 | #include <linux/percpu.h> |
19 | #include <linux/sched.h> | 19 | #include <linux/sched.h> |
20 | #include <linux/ktime.h> | 20 | #include <linux/ktime.h> |
21 | #include <linux/trace_clock.h> | ||
21 | 22 | ||
22 | /* | 23 | /* |
23 | * trace_clock_local(): the simplest and least coherent tracing clock. | 24 | * trace_clock_local(): the simplest and least coherent tracing clock. |
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 3047b56f6637..64ec4d278ffb 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c | |||
@@ -19,6 +19,39 @@ | |||
19 | 19 | ||
20 | static DEFINE_MUTEX(event_mutex); | 20 | static DEFINE_MUTEX(event_mutex); |
21 | 21 | ||
22 | int trace_define_field(struct ftrace_event_call *call, char *type, | ||
23 | char *name, int offset, int size) | ||
24 | { | ||
25 | struct ftrace_event_field *field; | ||
26 | |||
27 | field = kzalloc(sizeof(*field), GFP_KERNEL); | ||
28 | if (!field) | ||
29 | goto err; | ||
30 | |||
31 | field->name = kstrdup(name, GFP_KERNEL); | ||
32 | if (!field->name) | ||
33 | goto err; | ||
34 | |||
35 | field->type = kstrdup(type, GFP_KERNEL); | ||
36 | if (!field->type) | ||
37 | goto err; | ||
38 | |||
39 | field->offset = offset; | ||
40 | field->size = size; | ||
41 | list_add(&field->link, &call->fields); | ||
42 | |||
43 | return 0; | ||
44 | |||
45 | err: | ||
46 | if (field) { | ||
47 | kfree(field->name); | ||
48 | kfree(field->type); | ||
49 | } | ||
50 | kfree(field); | ||
51 | |||
52 | return -ENOMEM; | ||
53 | } | ||
54 | |||
22 | static void ftrace_clear_events(void) | 55 | static void ftrace_clear_events(void) |
23 | { | 56 | { |
24 | struct ftrace_event_call *call = (void *)__start_ftrace_events; | 57 | struct ftrace_event_call *call = (void *)__start_ftrace_events; |
@@ -343,7 +376,8 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt, | |||
343 | 376 | ||
344 | #undef FIELD | 377 | #undef FIELD |
345 | #define FIELD(type, name) \ | 378 | #define FIELD(type, name) \ |
346 | #type, #name, offsetof(typeof(field), name), sizeof(field.name) | 379 | #type, "common_" #name, offsetof(typeof(field), name), \ |
380 | sizeof(field.name) | ||
347 | 381 | ||
348 | static int trace_write_header(struct trace_seq *s) | 382 | static int trace_write_header(struct trace_seq *s) |
349 | { | 383 | { |
@@ -430,6 +464,139 @@ event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) | |||
430 | return r; | 464 | return r; |
431 | } | 465 | } |
432 | 466 | ||
467 | static ssize_t | ||
468 | event_filter_read(struct file *filp, char __user *ubuf, size_t cnt, | ||
469 | loff_t *ppos) | ||
470 | { | ||
471 | struct ftrace_event_call *call = filp->private_data; | ||
472 | struct trace_seq *s; | ||
473 | int r; | ||
474 | |||
475 | if (*ppos) | ||
476 | return 0; | ||
477 | |||
478 | s = kmalloc(sizeof(*s), GFP_KERNEL); | ||
479 | if (!s) | ||
480 | return -ENOMEM; | ||
481 | |||
482 | trace_seq_init(s); | ||
483 | |||
484 | filter_print_preds(call->preds, s); | ||
485 | r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len); | ||
486 | |||
487 | kfree(s); | ||
488 | |||
489 | return r; | ||
490 | } | ||
491 | |||
492 | static ssize_t | ||
493 | event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt, | ||
494 | loff_t *ppos) | ||
495 | { | ||
496 | struct ftrace_event_call *call = filp->private_data; | ||
497 | char buf[64], *pbuf = buf; | ||
498 | struct filter_pred *pred; | ||
499 | int err; | ||
500 | |||
501 | if (cnt >= sizeof(buf)) | ||
502 | return -EINVAL; | ||
503 | |||
504 | if (copy_from_user(&buf, ubuf, cnt)) | ||
505 | return -EFAULT; | ||
506 | |||
507 | pred = kzalloc(sizeof(*pred), GFP_KERNEL); | ||
508 | if (!pred) | ||
509 | return -ENOMEM; | ||
510 | |||
511 | err = filter_parse(&pbuf, pred); | ||
512 | if (err < 0) { | ||
513 | filter_free_pred(pred); | ||
514 | return err; | ||
515 | } | ||
516 | |||
517 | if (pred->clear) { | ||
518 | filter_free_preds(call); | ||
519 | filter_free_pred(pred); | ||
520 | return cnt; | ||
521 | } | ||
522 | |||
523 | if (filter_add_pred(call, pred)) { | ||
524 | filter_free_pred(pred); | ||
525 | return -EINVAL; | ||
526 | } | ||
527 | |||
528 | *ppos += cnt; | ||
529 | |||
530 | return cnt; | ||
531 | } | ||
532 | |||
533 | static ssize_t | ||
534 | subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt, | ||
535 | loff_t *ppos) | ||
536 | { | ||
537 | struct event_subsystem *system = filp->private_data; | ||
538 | struct trace_seq *s; | ||
539 | int r; | ||
540 | |||
541 | if (*ppos) | ||
542 | return 0; | ||
543 | |||
544 | s = kmalloc(sizeof(*s), GFP_KERNEL); | ||
545 | if (!s) | ||
546 | return -ENOMEM; | ||
547 | |||
548 | trace_seq_init(s); | ||
549 | |||
550 | filter_print_preds(system->preds, s); | ||
551 | r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len); | ||
552 | |||
553 | kfree(s); | ||
554 | |||
555 | return r; | ||
556 | } | ||
557 | |||
558 | static ssize_t | ||
559 | subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt, | ||
560 | loff_t *ppos) | ||
561 | { | ||
562 | struct event_subsystem *system = filp->private_data; | ||
563 | char buf[64], *pbuf = buf; | ||
564 | struct filter_pred *pred; | ||
565 | int err; | ||
566 | |||
567 | if (cnt >= sizeof(buf)) | ||
568 | return -EINVAL; | ||
569 | |||
570 | if (copy_from_user(&buf, ubuf, cnt)) | ||
571 | return -EFAULT; | ||
572 | |||
573 | pred = kzalloc(sizeof(*pred), GFP_KERNEL); | ||
574 | if (!pred) | ||
575 | return -ENOMEM; | ||
576 | |||
577 | err = filter_parse(&pbuf, pred); | ||
578 | if (err < 0) { | ||
579 | filter_free_pred(pred); | ||
580 | return err; | ||
581 | } | ||
582 | |||
583 | if (pred->clear) { | ||
584 | filter_free_subsystem_preds(system); | ||
585 | filter_free_pred(pred); | ||
586 | return cnt; | ||
587 | } | ||
588 | |||
589 | if (filter_add_subsystem_pred(system, pred)) { | ||
590 | filter_free_subsystem_preds(system); | ||
591 | filter_free_pred(pred); | ||
592 | return -EINVAL; | ||
593 | } | ||
594 | |||
595 | *ppos += cnt; | ||
596 | |||
597 | return cnt; | ||
598 | } | ||
599 | |||
433 | static const struct seq_operations show_event_seq_ops = { | 600 | static const struct seq_operations show_event_seq_ops = { |
434 | .start = t_start, | 601 | .start = t_start, |
435 | .next = t_next, | 602 | .next = t_next, |
@@ -475,6 +642,18 @@ static const struct file_operations ftrace_event_id_fops = { | |||
475 | .read = event_id_read, | 642 | .read = event_id_read, |
476 | }; | 643 | }; |
477 | 644 | ||
645 | static const struct file_operations ftrace_event_filter_fops = { | ||
646 | .open = tracing_open_generic, | ||
647 | .read = event_filter_read, | ||
648 | .write = event_filter_write, | ||
649 | }; | ||
650 | |||
651 | static const struct file_operations ftrace_subsystem_filter_fops = { | ||
652 | .open = tracing_open_generic, | ||
653 | .read = subsystem_filter_read, | ||
654 | .write = subsystem_filter_write, | ||
655 | }; | ||
656 | |||
478 | static struct dentry *event_trace_events_dir(void) | 657 | static struct dentry *event_trace_events_dir(void) |
479 | { | 658 | { |
480 | static struct dentry *d_tracer; | 659 | static struct dentry *d_tracer; |
@@ -495,12 +674,6 @@ static struct dentry *event_trace_events_dir(void) | |||
495 | return d_events; | 674 | return d_events; |
496 | } | 675 | } |
497 | 676 | ||
498 | struct event_subsystem { | ||
499 | struct list_head list; | ||
500 | const char *name; | ||
501 | struct dentry *entry; | ||
502 | }; | ||
503 | |||
504 | static LIST_HEAD(event_subsystems); | 677 | static LIST_HEAD(event_subsystems); |
505 | 678 | ||
506 | static struct dentry * | 679 | static struct dentry * |
@@ -533,6 +706,8 @@ event_subsystem_dir(const char *name, struct dentry *d_events) | |||
533 | system->name = name; | 706 | system->name = name; |
534 | list_add(&system->list, &event_subsystems); | 707 | list_add(&system->list, &event_subsystems); |
535 | 708 | ||
709 | system->preds = NULL; | ||
710 | |||
536 | return system->entry; | 711 | return system->entry; |
537 | } | 712 | } |
538 | 713 | ||
@@ -581,6 +756,20 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events) | |||
581 | call->name); | 756 | call->name); |
582 | } | 757 | } |
583 | 758 | ||
759 | if (call->define_fields) { | ||
760 | ret = call->define_fields(); | ||
761 | if (ret < 0) { | ||
762 | pr_warning("Could not initialize trace point" | ||
763 | " events/%s\n", call->name); | ||
764 | return ret; | ||
765 | } | ||
766 | entry = debugfs_create_file("filter", 0644, call->dir, call, | ||
767 | &ftrace_event_filter_fops); | ||
768 | if (!entry) | ||
769 | pr_warning("Could not create debugfs " | ||
770 | "'%s/filter' entry\n", call->name); | ||
771 | } | ||
772 | |||
584 | /* A trace may not want to export its format */ | 773 | /* A trace may not want to export its format */ |
585 | if (!call->show_format) | 774 | if (!call->show_format) |
586 | return 0; | 775 | return 0; |
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c new file mode 100644 index 000000000000..026be412f356 --- /dev/null +++ b/kernel/trace/trace_events_filter.c | |||
@@ -0,0 +1,427 @@ | |||
1 | /* | ||
2 | * trace_events_filter - generic event filtering | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License as published by | ||
6 | * the Free Software Foundation; either version 2 of the License, or | ||
7 | * (at your option) any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write to the Free Software | ||
16 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
17 | * | ||
18 | * Copyright (C) 2009 Tom Zanussi <tzanussi@gmail.com> | ||
19 | */ | ||
20 | |||
21 | #include <linux/debugfs.h> | ||
22 | #include <linux/uaccess.h> | ||
23 | #include <linux/module.h> | ||
24 | #include <linux/ctype.h> | ||
25 | |||
26 | #include "trace.h" | ||
27 | #include "trace_output.h" | ||
28 | |||
29 | static int filter_pred_64(struct filter_pred *pred, void *event) | ||
30 | { | ||
31 | u64 *addr = (u64 *)(event + pred->offset); | ||
32 | u64 val = (u64)pred->val; | ||
33 | int match; | ||
34 | |||
35 | match = (val == *addr) ^ pred->not; | ||
36 | |||
37 | return match; | ||
38 | } | ||
39 | |||
40 | static int filter_pred_32(struct filter_pred *pred, void *event) | ||
41 | { | ||
42 | u32 *addr = (u32 *)(event + pred->offset); | ||
43 | u32 val = (u32)pred->val; | ||
44 | int match; | ||
45 | |||
46 | match = (val == *addr) ^ pred->not; | ||
47 | |||
48 | return match; | ||
49 | } | ||
50 | |||
51 | static int filter_pred_16(struct filter_pred *pred, void *event) | ||
52 | { | ||
53 | u16 *addr = (u16 *)(event + pred->offset); | ||
54 | u16 val = (u16)pred->val; | ||
55 | int match; | ||
56 | |||
57 | match = (val == *addr) ^ pred->not; | ||
58 | |||
59 | return match; | ||
60 | } | ||
61 | |||
62 | static int filter_pred_8(struct filter_pred *pred, void *event) | ||
63 | { | ||
64 | u8 *addr = (u8 *)(event + pred->offset); | ||
65 | u8 val = (u8)pred->val; | ||
66 | int match; | ||
67 | |||
68 | match = (val == *addr) ^ pred->not; | ||
69 | |||
70 | return match; | ||
71 | } | ||
72 | |||
73 | static int filter_pred_string(struct filter_pred *pred, void *event) | ||
74 | { | ||
75 | char *addr = (char *)(event + pred->offset); | ||
76 | int cmp, match; | ||
77 | |||
78 | cmp = strncmp(addr, pred->str_val, pred->str_len); | ||
79 | |||
80 | match = (!cmp) ^ pred->not; | ||
81 | |||
82 | return match; | ||
83 | } | ||
84 | |||
85 | /* return 1 if event matches, 0 otherwise (discard) */ | ||
86 | int filter_match_preds(struct ftrace_event_call *call, void *rec) | ||
87 | { | ||
88 | int i, matched, and_failed = 0; | ||
89 | struct filter_pred *pred; | ||
90 | |||
91 | for (i = 0; i < MAX_FILTER_PRED; i++) { | ||
92 | if (call->preds[i]) { | ||
93 | pred = call->preds[i]; | ||
94 | if (and_failed && !pred->or) | ||
95 | continue; | ||
96 | matched = pred->fn(pred, rec); | ||
97 | if (!matched && !pred->or) { | ||
98 | and_failed = 1; | ||
99 | continue; | ||
100 | } else if (matched && pred->or) | ||
101 | return 1; | ||
102 | } else | ||
103 | break; | ||
104 | } | ||
105 | |||
106 | if (and_failed) | ||
107 | return 0; | ||
108 | |||
109 | return 1; | ||
110 | } | ||
111 | |||
112 | void filter_print_preds(struct filter_pred **preds, struct trace_seq *s) | ||
113 | { | ||
114 | char *field_name; | ||
115 | struct filter_pred *pred; | ||
116 | int i; | ||
117 | |||
118 | if (!preds) { | ||
119 | trace_seq_printf(s, "none\n"); | ||
120 | return; | ||
121 | } | ||
122 | |||
123 | for (i = 0; i < MAX_FILTER_PRED; i++) { | ||
124 | if (preds[i]) { | ||
125 | pred = preds[i]; | ||
126 | field_name = pred->field_name; | ||
127 | if (i) | ||
128 | trace_seq_printf(s, pred->or ? "|| " : "&& "); | ||
129 | trace_seq_printf(s, "%s ", field_name); | ||
130 | trace_seq_printf(s, pred->not ? "!= " : "== "); | ||
131 | if (pred->str_val) | ||
132 | trace_seq_printf(s, "%s\n", pred->str_val); | ||
133 | else | ||
134 | trace_seq_printf(s, "%llu\n", pred->val); | ||
135 | } else | ||
136 | break; | ||
137 | } | ||
138 | } | ||
139 | |||
140 | static struct ftrace_event_field * | ||
141 | find_event_field(struct ftrace_event_call *call, char *name) | ||
142 | { | ||
143 | struct ftrace_event_field *field; | ||
144 | |||
145 | list_for_each_entry(field, &call->fields, link) { | ||
146 | if (!strcmp(field->name, name)) | ||
147 | return field; | ||
148 | } | ||
149 | |||
150 | return NULL; | ||
151 | } | ||
152 | |||
153 | void filter_free_pred(struct filter_pred *pred) | ||
154 | { | ||
155 | if (!pred) | ||
156 | return; | ||
157 | |||
158 | kfree(pred->field_name); | ||
159 | kfree(pred->str_val); | ||
160 | kfree(pred); | ||
161 | } | ||
162 | |||
163 | void filter_free_preds(struct ftrace_event_call *call) | ||
164 | { | ||
165 | int i; | ||
166 | |||
167 | if (call->preds) { | ||
168 | for (i = 0; i < MAX_FILTER_PRED; i++) | ||
169 | filter_free_pred(call->preds[i]); | ||
170 | kfree(call->preds); | ||
171 | call->preds = NULL; | ||
172 | } | ||
173 | } | ||
174 | |||
175 | void filter_free_subsystem_preds(struct event_subsystem *system) | ||
176 | { | ||
177 | struct ftrace_event_call *call = __start_ftrace_events; | ||
178 | int i; | ||
179 | |||
180 | if (system->preds) { | ||
181 | for (i = 0; i < MAX_FILTER_PRED; i++) | ||
182 | filter_free_pred(system->preds[i]); | ||
183 | kfree(system->preds); | ||
184 | system->preds = NULL; | ||
185 | } | ||
186 | |||
187 | events_for_each(call) { | ||
188 | if (!call->name || !call->regfunc) | ||
189 | continue; | ||
190 | |||
191 | if (!strcmp(call->system, system->name)) | ||
192 | filter_free_preds(call); | ||
193 | } | ||
194 | } | ||
195 | |||
196 | static int __filter_add_pred(struct ftrace_event_call *call, | ||
197 | struct filter_pred *pred) | ||
198 | { | ||
199 | int i; | ||
200 | |||
201 | if (call->preds && !pred->compound) | ||
202 | filter_free_preds(call); | ||
203 | |||
204 | if (!call->preds) { | ||
205 | call->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred), | ||
206 | GFP_KERNEL); | ||
207 | if (!call->preds) | ||
208 | return -ENOMEM; | ||
209 | } | ||
210 | |||
211 | for (i = 0; i < MAX_FILTER_PRED; i++) { | ||
212 | if (!call->preds[i]) { | ||
213 | call->preds[i] = pred; | ||
214 | return 0; | ||
215 | } | ||
216 | } | ||
217 | |||
218 | return -ENOMEM; | ||
219 | } | ||
220 | |||
221 | static int is_string_field(const char *type) | ||
222 | { | ||
223 | if (strchr(type, '[') && strstr(type, "char")) | ||
224 | return 1; | ||
225 | |||
226 | return 0; | ||
227 | } | ||
228 | |||
229 | int filter_add_pred(struct ftrace_event_call *call, struct filter_pred *pred) | ||
230 | { | ||
231 | struct ftrace_event_field *field; | ||
232 | |||
233 | field = find_event_field(call, pred->field_name); | ||
234 | if (!field) | ||
235 | return -EINVAL; | ||
236 | |||
237 | pred->offset = field->offset; | ||
238 | |||
239 | if (is_string_field(field->type)) { | ||
240 | if (!pred->str_val) | ||
241 | return -EINVAL; | ||
242 | pred->fn = filter_pred_string; | ||
243 | pred->str_len = field->size; | ||
244 | return __filter_add_pred(call, pred); | ||
245 | } else { | ||
246 | if (pred->str_val) | ||
247 | return -EINVAL; | ||
248 | } | ||
249 | |||
250 | switch (field->size) { | ||
251 | case 8: | ||
252 | pred->fn = filter_pred_64; | ||
253 | break; | ||
254 | case 4: | ||
255 | pred->fn = filter_pred_32; | ||
256 | break; | ||
257 | case 2: | ||
258 | pred->fn = filter_pred_16; | ||
259 | break; | ||
260 | case 1: | ||
261 | pred->fn = filter_pred_8; | ||
262 | break; | ||
263 | default: | ||
264 | return -EINVAL; | ||
265 | } | ||
266 | |||
267 | return __filter_add_pred(call, pred); | ||
268 | } | ||
269 | |||
270 | static struct filter_pred *copy_pred(struct filter_pred *pred) | ||
271 | { | ||
272 | struct filter_pred *new_pred = kmalloc(sizeof(*pred), GFP_KERNEL); | ||
273 | if (!new_pred) | ||
274 | return NULL; | ||
275 | |||
276 | memcpy(new_pred, pred, sizeof(*pred)); | ||
277 | |||
278 | if (pred->field_name) { | ||
279 | new_pred->field_name = kstrdup(pred->field_name, GFP_KERNEL); | ||
280 | if (!new_pred->field_name) { | ||
281 | kfree(new_pred); | ||
282 | return NULL; | ||
283 | } | ||
284 | } | ||
285 | |||
286 | if (pred->str_val) { | ||
287 | new_pred->str_val = kstrdup(pred->str_val, GFP_KERNEL); | ||
288 | if (!new_pred->str_val) { | ||
289 | filter_free_pred(new_pred); | ||
290 | return NULL; | ||
291 | } | ||
292 | } | ||
293 | |||
294 | return new_pred; | ||
295 | } | ||
296 | |||
297 | int filter_add_subsystem_pred(struct event_subsystem *system, | ||
298 | struct filter_pred *pred) | ||
299 | { | ||
300 | struct ftrace_event_call *call = __start_ftrace_events; | ||
301 | struct filter_pred *event_pred; | ||
302 | int i; | ||
303 | |||
304 | if (system->preds && !pred->compound) | ||
305 | filter_free_subsystem_preds(system); | ||
306 | |||
307 | if (!system->preds) { | ||
308 | system->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred), | ||
309 | GFP_KERNEL); | ||
310 | if (!system->preds) | ||
311 | return -ENOMEM; | ||
312 | } | ||
313 | |||
314 | for (i = 0; i < MAX_FILTER_PRED; i++) { | ||
315 | if (!system->preds[i]) { | ||
316 | system->preds[i] = pred; | ||
317 | break; | ||
318 | } | ||
319 | } | ||
320 | |||
321 | if (i == MAX_FILTER_PRED) | ||
322 | return -EINVAL; | ||
323 | |||
324 | events_for_each(call) { | ||
325 | int err; | ||
326 | |||
327 | if (!call->name || !call->regfunc) | ||
328 | continue; | ||
329 | |||
330 | if (strcmp(call->system, system->name)) | ||
331 | continue; | ||
332 | |||
333 | if (!find_event_field(call, pred->field_name)) | ||
334 | continue; | ||
335 | |||
336 | event_pred = copy_pred(pred); | ||
337 | if (!event_pred) | ||
338 | goto oom; | ||
339 | |||
340 | err = filter_add_pred(call, event_pred); | ||
341 | if (err) | ||
342 | filter_free_pred(event_pred); | ||
343 | if (err == -ENOMEM) | ||
344 | goto oom; | ||
345 | } | ||
346 | |||
347 | return 0; | ||
348 | |||
349 | oom: | ||
350 | system->preds[i] = NULL; | ||
351 | return -ENOMEM; | ||
352 | } | ||
353 | |||
354 | int filter_parse(char **pbuf, struct filter_pred *pred) | ||
355 | { | ||
356 | char *tmp, *tok, *val_str = NULL; | ||
357 | int tok_n = 0; | ||
358 | |||
359 | /* field ==/!= number, or/and field ==/!= number, number */ | ||
360 | while ((tok = strsep(pbuf, " \n"))) { | ||
361 | if (tok_n == 0) { | ||
362 | if (!strcmp(tok, "0")) { | ||
363 | pred->clear = 1; | ||
364 | return 0; | ||
365 | } else if (!strcmp(tok, "&&")) { | ||
366 | pred->or = 0; | ||
367 | pred->compound = 1; | ||
368 | } else if (!strcmp(tok, "||")) { | ||
369 | pred->or = 1; | ||
370 | pred->compound = 1; | ||
371 | } else | ||
372 | pred->field_name = tok; | ||
373 | tok_n = 1; | ||
374 | continue; | ||
375 | } | ||
376 | if (tok_n == 1) { | ||
377 | if (!pred->field_name) | ||
378 | pred->field_name = tok; | ||
379 | else if (!strcmp(tok, "!=")) | ||
380 | pred->not = 1; | ||
381 | else if (!strcmp(tok, "==")) | ||
382 | pred->not = 0; | ||
383 | else { | ||
384 | pred->field_name = NULL; | ||
385 | return -EINVAL; | ||
386 | } | ||
387 | tok_n = 2; | ||
388 | continue; | ||
389 | } | ||
390 | if (tok_n == 2) { | ||
391 | if (pred->compound) { | ||
392 | if (!strcmp(tok, "!=")) | ||
393 | pred->not = 1; | ||
394 | else if (!strcmp(tok, "==")) | ||
395 | pred->not = 0; | ||
396 | else { | ||
397 | pred->field_name = NULL; | ||
398 | return -EINVAL; | ||
399 | } | ||
400 | } else { | ||
401 | val_str = tok; | ||
402 | break; /* done */ | ||
403 | } | ||
404 | tok_n = 3; | ||
405 | continue; | ||
406 | } | ||
407 | if (tok_n == 3) { | ||
408 | val_str = tok; | ||
409 | break; /* done */ | ||
410 | } | ||
411 | } | ||
412 | |||
413 | pred->field_name = kstrdup(pred->field_name, GFP_KERNEL); | ||
414 | if (!pred->field_name) | ||
415 | return -ENOMEM; | ||
416 | |||
417 | pred->val = simple_strtoull(val_str, &tmp, 10); | ||
418 | if (tmp == val_str) { | ||
419 | pred->str_val = kstrdup(val_str, GFP_KERNEL); | ||
420 | if (!pred->str_val) | ||
421 | return -ENOMEM; | ||
422 | } | ||
423 | |||
424 | return 0; | ||
425 | } | ||
426 | |||
427 | |||
diff --git a/kernel/trace/trace_events_stage_2.h b/kernel/trace/trace_events_stage_2.h index 5117c43f5c67..30743f7d4110 100644 --- a/kernel/trace/trace_events_stage_2.h +++ b/kernel/trace/trace_events_stage_2.h | |||
@@ -129,3 +129,48 @@ ftrace_format_##call(struct trace_seq *s) \ | |||
129 | } | 129 | } |
130 | 130 | ||
131 | #include <trace/trace_event_types.h> | 131 | #include <trace/trace_event_types.h> |
132 | |||
133 | #undef __field | ||
134 | #define __field(type, item) \ | ||
135 | ret = trace_define_field(event_call, #type, #item, \ | ||
136 | offsetof(typeof(field), item), \ | ||
137 | sizeof(field.item)); \ | ||
138 | if (ret) \ | ||
139 | return ret; | ||
140 | |||
141 | #undef __array | ||
142 | #define __array(type, item, len) \ | ||
143 | ret = trace_define_field(event_call, #type "[" #len "]", #item, \ | ||
144 | offsetof(typeof(field), item), \ | ||
145 | sizeof(field.item)); \ | ||
146 | if (ret) \ | ||
147 | return ret; | ||
148 | |||
149 | #define __common_field(type, item) \ | ||
150 | ret = trace_define_field(event_call, #type, "common_" #item, \ | ||
151 | offsetof(typeof(field.ent), item), \ | ||
152 | sizeof(field.ent.item)); \ | ||
153 | if (ret) \ | ||
154 | return ret; | ||
155 | |||
156 | #undef TRACE_EVENT | ||
157 | #define TRACE_EVENT(call, proto, args, tstruct, func, print) \ | ||
158 | int \ | ||
159 | ftrace_define_fields_##call(void) \ | ||
160 | { \ | ||
161 | struct ftrace_raw_##call field; \ | ||
162 | struct ftrace_event_call *event_call = &event_##call; \ | ||
163 | int ret; \ | ||
164 | \ | ||
165 | __common_field(unsigned char, type); \ | ||
166 | __common_field(unsigned char, flags); \ | ||
167 | __common_field(unsigned char, preempt_count); \ | ||
168 | __common_field(int, pid); \ | ||
169 | __common_field(int, tgid); \ | ||
170 | \ | ||
171 | tstruct; \ | ||
172 | \ | ||
173 | return ret; \ | ||
174 | } | ||
175 | |||
176 | #include <trace/trace_event_types.h> | ||
diff --git a/kernel/trace/trace_events_stage_3.h b/kernel/trace/trace_events_stage_3.h index 6b3261ca988c..9d2fa78cecca 100644 --- a/kernel/trace/trace_events_stage_3.h +++ b/kernel/trace/trace_events_stage_3.h | |||
@@ -204,6 +204,7 @@ static struct ftrace_event_call event_##call; \ | |||
204 | \ | 204 | \ |
205 | static void ftrace_raw_event_##call(proto) \ | 205 | static void ftrace_raw_event_##call(proto) \ |
206 | { \ | 206 | { \ |
207 | struct ftrace_event_call *call = &event_##call; \ | ||
207 | struct ring_buffer_event *event; \ | 208 | struct ring_buffer_event *event; \ |
208 | struct ftrace_raw_##call *entry; \ | 209 | struct ftrace_raw_##call *entry; \ |
209 | unsigned long irq_flags; \ | 210 | unsigned long irq_flags; \ |
@@ -221,7 +222,11 @@ static void ftrace_raw_event_##call(proto) \ | |||
221 | \ | 222 | \ |
222 | assign; \ | 223 | assign; \ |
223 | \ | 224 | \ |
224 | trace_current_buffer_unlock_commit(event, irq_flags, pc); \ | 225 | if (call->preds && !filter_match_preds(call, entry)) \ |
226 | ring_buffer_event_discard(event); \ | ||
227 | \ | ||
228 | trace_nowake_buffer_unlock_commit(event, irq_flags, pc); \ | ||
229 | \ | ||
225 | } \ | 230 | } \ |
226 | \ | 231 | \ |
227 | static int ftrace_raw_reg_event_##call(void) \ | 232 | static int ftrace_raw_reg_event_##call(void) \ |
@@ -252,6 +257,7 @@ static int ftrace_raw_init_event_##call(void) \ | |||
252 | if (!id) \ | 257 | if (!id) \ |
253 | return -ENODEV; \ | 258 | return -ENODEV; \ |
254 | event_##call.id = id; \ | 259 | event_##call.id = id; \ |
260 | INIT_LIST_HEAD(&event_##call.fields); \ | ||
255 | return 0; \ | 261 | return 0; \ |
256 | } \ | 262 | } \ |
257 | \ | 263 | \ |
@@ -264,6 +270,7 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ | |||
264 | .regfunc = ftrace_raw_reg_event_##call, \ | 270 | .regfunc = ftrace_raw_reg_event_##call, \ |
265 | .unregfunc = ftrace_raw_unreg_event_##call, \ | 271 | .unregfunc = ftrace_raw_unreg_event_##call, \ |
266 | .show_format = ftrace_format_##call, \ | 272 | .show_format = ftrace_format_##call, \ |
273 | .define_fields = ftrace_define_fields_##call, \ | ||
267 | _TRACE_PROFILE_INIT(call) \ | 274 | _TRACE_PROFILE_INIT(call) \ |
268 | } | 275 | } |
269 | 276 | ||
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index e876816fa8e7..d28687e7b3a7 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c | |||
@@ -57,9 +57,9 @@ static struct tracer_flags tracer_flags = { | |||
57 | 57 | ||
58 | /* Add a function return address to the trace stack on thread info.*/ | 58 | /* Add a function return address to the trace stack on thread info.*/ |
59 | int | 59 | int |
60 | ftrace_push_return_trace(unsigned long ret, unsigned long long time, | 60 | ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth) |
61 | unsigned long func, int *depth) | ||
62 | { | 61 | { |
62 | unsigned long long calltime; | ||
63 | int index; | 63 | int index; |
64 | 64 | ||
65 | if (!current->ret_stack) | 65 | if (!current->ret_stack) |
@@ -71,11 +71,13 @@ ftrace_push_return_trace(unsigned long ret, unsigned long long time, | |||
71 | return -EBUSY; | 71 | return -EBUSY; |
72 | } | 72 | } |
73 | 73 | ||
74 | calltime = trace_clock_local(); | ||
75 | |||
74 | index = ++current->curr_ret_stack; | 76 | index = ++current->curr_ret_stack; |
75 | barrier(); | 77 | barrier(); |
76 | current->ret_stack[index].ret = ret; | 78 | current->ret_stack[index].ret = ret; |
77 | current->ret_stack[index].func = func; | 79 | current->ret_stack[index].func = func; |
78 | current->ret_stack[index].calltime = time; | 80 | current->ret_stack[index].calltime = calltime; |
79 | *depth = index; | 81 | *depth = index; |
80 | 82 | ||
81 | return 0; | 83 | return 0; |
diff --git a/kernel/trace/trace_nop.c b/kernel/trace/trace_nop.c index 9aa84bde23cd..394f94417e2f 100644 --- a/kernel/trace/trace_nop.c +++ b/kernel/trace/trace_nop.c | |||
@@ -91,6 +91,7 @@ struct tracer nop_trace __read_mostly = | |||
91 | .name = "nop", | 91 | .name = "nop", |
92 | .init = nop_trace_init, | 92 | .init = nop_trace_init, |
93 | .reset = nop_trace_reset, | 93 | .reset = nop_trace_reset, |
94 | .wait_pipe = poll_wait_pipe, | ||
94 | #ifdef CONFIG_FTRACE_SELFTEST | 95 | #ifdef CONFIG_FTRACE_SELFTEST |
95 | .selftest = trace_selftest_startup_nop, | 96 | .selftest = trace_selftest_startup_nop, |
96 | #endif | 97 | #endif |
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 19261fdd2455..d72b9a63b247 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c | |||
@@ -137,7 +137,7 @@ int trace_seq_putc(struct trace_seq *s, unsigned char c) | |||
137 | return 1; | 137 | return 1; |
138 | } | 138 | } |
139 | 139 | ||
140 | int trace_seq_putmem(struct trace_seq *s, void *mem, size_t len) | 140 | int trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len) |
141 | { | 141 | { |
142 | if (len > ((PAGE_SIZE - 1) - s->len)) | 142 | if (len > ((PAGE_SIZE - 1) - s->len)) |
143 | return 0; | 143 | return 0; |
@@ -148,10 +148,10 @@ int trace_seq_putmem(struct trace_seq *s, void *mem, size_t len) | |||
148 | return len; | 148 | return len; |
149 | } | 149 | } |
150 | 150 | ||
151 | int trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len) | 151 | int trace_seq_putmem_hex(struct trace_seq *s, const void *mem, size_t len) |
152 | { | 152 | { |
153 | unsigned char hex[HEX_CHARS]; | 153 | unsigned char hex[HEX_CHARS]; |
154 | unsigned char *data = mem; | 154 | const unsigned char *data = mem; |
155 | int i, j; | 155 | int i, j; |
156 | 156 | ||
157 | #ifdef __BIG_ENDIAN | 157 | #ifdef __BIG_ENDIAN |
@@ -167,6 +167,19 @@ int trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len) | |||
167 | return trace_seq_putmem(s, hex, j); | 167 | return trace_seq_putmem(s, hex, j); |
168 | } | 168 | } |
169 | 169 | ||
170 | void *trace_seq_reserve(struct trace_seq *s, size_t len) | ||
171 | { | ||
172 | void *ret; | ||
173 | |||
174 | if (len > ((PAGE_SIZE - 1) - s->len)) | ||
175 | return NULL; | ||
176 | |||
177 | ret = s->buffer + s->len; | ||
178 | s->len += len; | ||
179 | |||
180 | return ret; | ||
181 | } | ||
182 | |||
170 | int trace_seq_path(struct trace_seq *s, struct path *path) | 183 | int trace_seq_path(struct trace_seq *s, struct path *path) |
171 | { | 184 | { |
172 | unsigned char *p; | 185 | unsigned char *p; |
diff --git a/kernel/trace/trace_output.h b/kernel/trace/trace_output.h index 35c422fb51a9..e0bde39c2dd9 100644 --- a/kernel/trace/trace_output.h +++ b/kernel/trace/trace_output.h | |||
@@ -29,24 +29,27 @@ seq_print_ip_sym(struct trace_seq *s, unsigned long ip, | |||
29 | unsigned long sym_flags); | 29 | unsigned long sym_flags); |
30 | extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, | 30 | extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, |
31 | size_t cnt); | 31 | size_t cnt); |
32 | int trace_seq_puts(struct trace_seq *s, const char *str); | 32 | extern int trace_seq_puts(struct trace_seq *s, const char *str); |
33 | int trace_seq_putc(struct trace_seq *s, unsigned char c); | 33 | extern int trace_seq_putc(struct trace_seq *s, unsigned char c); |
34 | int trace_seq_putmem(struct trace_seq *s, void *mem, size_t len); | 34 | extern int trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len); |
35 | int trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len); | 35 | extern int trace_seq_putmem_hex(struct trace_seq *s, const void *mem, |
36 | int trace_seq_path(struct trace_seq *s, struct path *path); | 36 | size_t len); |
37 | int seq_print_userip_objs(const struct userstack_entry *entry, | 37 | extern void *trace_seq_reserve(struct trace_seq *s, size_t len); |
38 | struct trace_seq *s, unsigned long sym_flags); | 38 | extern int trace_seq_path(struct trace_seq *s, struct path *path); |
39 | int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm, | 39 | extern int seq_print_userip_objs(const struct userstack_entry *entry, |
40 | unsigned long ip, unsigned long sym_flags); | 40 | struct trace_seq *s, unsigned long sym_flags); |
41 | extern int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm, | ||
42 | unsigned long ip, unsigned long sym_flags); | ||
41 | 43 | ||
42 | int trace_print_context(struct trace_iterator *iter); | 44 | extern int trace_print_context(struct trace_iterator *iter); |
43 | int trace_print_lat_context(struct trace_iterator *iter); | 45 | extern int trace_print_lat_context(struct trace_iterator *iter); |
44 | 46 | ||
45 | struct trace_event *ftrace_find_event(int type); | 47 | extern struct trace_event *ftrace_find_event(int type); |
46 | int register_ftrace_event(struct trace_event *event); | 48 | extern int register_ftrace_event(struct trace_event *event); |
47 | int unregister_ftrace_event(struct trace_event *event); | 49 | extern int unregister_ftrace_event(struct trace_event *event); |
48 | 50 | ||
49 | enum print_line_t trace_nop_print(struct trace_iterator *iter, int flags); | 51 | extern enum print_line_t trace_nop_print(struct trace_iterator *iter, |
52 | int flags); | ||
50 | 53 | ||
51 | #define MAX_MEMHEX_BYTES 8 | 54 | #define MAX_MEMHEX_BYTES 8 |
52 | #define HEX_CHARS (MAX_MEMHEX_BYTES*2 + 1) | 55 | #define HEX_CHARS (MAX_MEMHEX_BYTES*2 + 1) |
diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c index 39310e3434ee..acdebd771a93 100644 --- a/kernel/trace/trace_stat.c +++ b/kernel/trace/trace_stat.c | |||
@@ -75,7 +75,7 @@ static int stat_seq_init(struct tracer_stat_session *session) | |||
75 | { | 75 | { |
76 | struct trace_stat_list *iter_entry, *new_entry; | 76 | struct trace_stat_list *iter_entry, *new_entry; |
77 | struct tracer_stat *ts = session->ts; | 77 | struct tracer_stat *ts = session->ts; |
78 | void *prev_stat; | 78 | void *stat; |
79 | int ret = 0; | 79 | int ret = 0; |
80 | int i; | 80 | int i; |
81 | 81 | ||
@@ -85,6 +85,10 @@ static int stat_seq_init(struct tracer_stat_session *session) | |||
85 | if (!ts->stat_cmp) | 85 | if (!ts->stat_cmp) |
86 | ts->stat_cmp = dummy_cmp; | 86 | ts->stat_cmp = dummy_cmp; |
87 | 87 | ||
88 | stat = ts->stat_start(); | ||
89 | if (!stat) | ||
90 | goto exit; | ||
91 | |||
88 | /* | 92 | /* |
89 | * The first entry. Actually this is the second, but the first | 93 | * The first entry. Actually this is the second, but the first |
90 | * one (the stat_list head) is pointless. | 94 | * one (the stat_list head) is pointless. |
@@ -99,14 +103,19 @@ static int stat_seq_init(struct tracer_stat_session *session) | |||
99 | 103 | ||
100 | list_add(&new_entry->list, &session->stat_list); | 104 | list_add(&new_entry->list, &session->stat_list); |
101 | 105 | ||
102 | new_entry->stat = ts->stat_start(); | 106 | new_entry->stat = stat; |
103 | prev_stat = new_entry->stat; | ||
104 | 107 | ||
105 | /* | 108 | /* |
106 | * Iterate over the tracer stat entries and store them in a sorted | 109 | * Iterate over the tracer stat entries and store them in a sorted |
107 | * list. | 110 | * list. |
108 | */ | 111 | */ |
109 | for (i = 1; ; i++) { | 112 | for (i = 1; ; i++) { |
113 | stat = ts->stat_next(stat, i); | ||
114 | |||
115 | /* End of insertion */ | ||
116 | if (!stat) | ||
117 | break; | ||
118 | |||
110 | new_entry = kmalloc(sizeof(struct trace_stat_list), GFP_KERNEL); | 119 | new_entry = kmalloc(sizeof(struct trace_stat_list), GFP_KERNEL); |
111 | if (!new_entry) { | 120 | if (!new_entry) { |
112 | ret = -ENOMEM; | 121 | ret = -ENOMEM; |
@@ -114,31 +123,23 @@ static int stat_seq_init(struct tracer_stat_session *session) | |||
114 | } | 123 | } |
115 | 124 | ||
116 | INIT_LIST_HEAD(&new_entry->list); | 125 | INIT_LIST_HEAD(&new_entry->list); |
117 | new_entry->stat = ts->stat_next(prev_stat, i); | 126 | new_entry->stat = stat; |
118 | 127 | ||
119 | /* End of insertion */ | 128 | list_for_each_entry_reverse(iter_entry, &session->stat_list, |
120 | if (!new_entry->stat) | 129 | list) { |
121 | break; | ||
122 | |||
123 | list_for_each_entry(iter_entry, &session->stat_list, list) { | ||
124 | 130 | ||
125 | /* Insertion with a descendent sorting */ | 131 | /* Insertion with a descendent sorting */ |
126 | if (ts->stat_cmp(new_entry->stat, | 132 | if (ts->stat_cmp(iter_entry->stat, |
127 | iter_entry->stat) > 0) { | 133 | new_entry->stat) >= 0) { |
128 | |||
129 | list_add_tail(&new_entry->list, | ||
130 | &iter_entry->list); | ||
131 | break; | ||
132 | 134 | ||
133 | /* The current smaller value */ | ||
134 | } else if (list_is_last(&iter_entry->list, | ||
135 | &session->stat_list)) { | ||
136 | list_add(&new_entry->list, &iter_entry->list); | 135 | list_add(&new_entry->list, &iter_entry->list); |
137 | break; | 136 | break; |
138 | } | 137 | } |
139 | } | 138 | } |
140 | 139 | ||
141 | prev_stat = new_entry->stat; | 140 | /* The current larger value */ |
141 | if (list_empty(&new_entry->list)) | ||
142 | list_add(&new_entry->list, &session->stat_list); | ||
142 | } | 143 | } |
143 | exit: | 144 | exit: |
144 | mutex_unlock(&session->stat_mutex); | 145 | mutex_unlock(&session->stat_mutex); |
@@ -160,7 +161,7 @@ static void *stat_seq_start(struct seq_file *s, loff_t *pos) | |||
160 | 161 | ||
161 | /* If we are in the beginning of the file, print the headers */ | 162 | /* If we are in the beginning of the file, print the headers */ |
162 | if (!*pos && session->ts->stat_headers) | 163 | if (!*pos && session->ts->stat_headers) |
163 | session->ts->stat_headers(s); | 164 | return SEQ_START_TOKEN; |
164 | 165 | ||
165 | return seq_list_start(&session->stat_list, *pos); | 166 | return seq_list_start(&session->stat_list, *pos); |
166 | } | 167 | } |
@@ -169,6 +170,9 @@ static void *stat_seq_next(struct seq_file *s, void *p, loff_t *pos) | |||
169 | { | 170 | { |
170 | struct tracer_stat_session *session = s->private; | 171 | struct tracer_stat_session *session = s->private; |
171 | 172 | ||
173 | if (p == SEQ_START_TOKEN) | ||
174 | return seq_list_start(&session->stat_list, *pos); | ||
175 | |||
172 | return seq_list_next(p, &session->stat_list, pos); | 176 | return seq_list_next(p, &session->stat_list, pos); |
173 | } | 177 | } |
174 | 178 | ||
@@ -183,6 +187,9 @@ static int stat_seq_show(struct seq_file *s, void *v) | |||
183 | struct tracer_stat_session *session = s->private; | 187 | struct tracer_stat_session *session = s->private; |
184 | struct trace_stat_list *l = list_entry(v, struct trace_stat_list, list); | 188 | struct trace_stat_list *l = list_entry(v, struct trace_stat_list, list); |
185 | 189 | ||
190 | if (v == SEQ_START_TOKEN) | ||
191 | return session->ts->stat_headers(s); | ||
192 | |||
186 | return session->ts->stat_show(s, l->stat); | 193 | return session->ts->stat_show(s, l->stat); |
187 | } | 194 | } |
188 | 195 | ||
diff --git a/kernel/trace/trace_workqueue.c b/kernel/trace/trace_workqueue.c index 9ab035b58cf1..797201e4a137 100644 --- a/kernel/trace/trace_workqueue.c +++ b/kernel/trace/trace_workqueue.c | |||
@@ -196,6 +196,11 @@ static int workqueue_stat_show(struct seq_file *s, void *p) | |||
196 | struct pid *pid; | 196 | struct pid *pid; |
197 | struct task_struct *tsk; | 197 | struct task_struct *tsk; |
198 | 198 | ||
199 | spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags); | ||
200 | if (&cws->list == workqueue_cpu_stat(cpu)->list.next) | ||
201 | seq_printf(s, "\n"); | ||
202 | spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags); | ||
203 | |||
199 | pid = find_get_pid(cws->pid); | 204 | pid = find_get_pid(cws->pid); |
200 | if (pid) { | 205 | if (pid) { |
201 | tsk = get_pid_task(pid, PIDTYPE_PID); | 206 | tsk = get_pid_task(pid, PIDTYPE_PID); |
@@ -208,18 +213,13 @@ static int workqueue_stat_show(struct seq_file *s, void *p) | |||
208 | put_pid(pid); | 213 | put_pid(pid); |
209 | } | 214 | } |
210 | 215 | ||
211 | spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags); | ||
212 | if (&cws->list == workqueue_cpu_stat(cpu)->list.next) | ||
213 | seq_printf(s, "\n"); | ||
214 | spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags); | ||
215 | |||
216 | return 0; | 216 | return 0; |
217 | } | 217 | } |
218 | 218 | ||
219 | static int workqueue_stat_headers(struct seq_file *s) | 219 | static int workqueue_stat_headers(struct seq_file *s) |
220 | { | 220 | { |
221 | seq_printf(s, "# CPU INSERTED EXECUTED NAME\n"); | 221 | seq_printf(s, "# CPU INSERTED EXECUTED NAME\n"); |
222 | seq_printf(s, "# | | | |\n\n"); | 222 | seq_printf(s, "# | | | |\n"); |
223 | return 0; | 223 | return 0; |
224 | } | 224 | } |
225 | 225 | ||