aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/trace
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2009-04-07 07:47:33 -0400
committerIngo Molnar <mingo@elte.hu>2009-04-07 07:47:45 -0400
commit93776a8ec746cf9d32c36e5a5b23d28d8be28826 (patch)
tree6c472ae9f709246ee5268e1d71559d07839fb965 /kernel/trace
parent34886c8bc590f078d4c0b88f50d061326639198d (diff)
parentd508afb437daee7cf07da085b635c44a4ebf9b38 (diff)
Merge branch 'linus' into tracing/core
Merge reason: update to upstream tracing facilities Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/trace')
-rw-r--r--kernel/trace/Kconfig9
-rw-r--r--kernel/trace/Makefile1
-rw-r--r--kernel/trace/blktrace.c473
-rw-r--r--kernel/trace/ftrace.c13
-rw-r--r--kernel/trace/kmemtrace.c319
-rw-r--r--kernel/trace/ring_buffer.c118
-rw-r--r--kernel/trace/trace.c42
-rw-r--r--kernel/trace/trace.h82
-rw-r--r--kernel/trace/trace_events.c203
-rw-r--r--kernel/trace/trace_events_filter.c427
-rw-r--r--kernel/trace/trace_events_stage_2.h45
-rw-r--r--kernel/trace/trace_events_stage_3.h9
-rw-r--r--kernel/trace/trace_nop.c1
-rw-r--r--kernel/trace/trace_output.c19
-rw-r--r--kernel/trace/trace_output.h33
-rw-r--r--kernel/trace/trace_stat.c26
-rw-r--r--kernel/trace/trace_workqueue.c12
17 files changed, 1408 insertions, 424 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 8a4136096d7d..23b96ebbf893 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -99,11 +99,10 @@ config FUNCTION_GRAPH_TRACER
99 help 99 help
100 Enable the kernel to trace a function at both its return 100 Enable the kernel to trace a function at both its return
101 and its entry. 101 and its entry.
102 It's first purpose is to trace the duration of functions and 102 Its first purpose is to trace the duration of functions and
103 draw a call graph for each thread with some informations like 103 draw a call graph for each thread with some information like
104 the return value. 104 the return value. This is done by setting the current return
105 This is done by setting the current return address on the current 105 address on the current task structure into a stack of calls.
106 task structure into a stack of calls.
107 106
108 107
109config IRQSOFF_TRACER 108config IRQSOFF_TRACER
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 0e45c206c2f9..2630f5121ec1 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -45,5 +45,6 @@ obj-$(CONFIG_EVENT_TRACER) += events.o
45obj-$(CONFIG_EVENT_TRACER) += trace_export.o 45obj-$(CONFIG_EVENT_TRACER) += trace_export.o
46obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o 46obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o
47obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o 47obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o
48obj-$(CONFIG_EVENT_TRACER) += trace_events_filter.o
48 49
49libftrace-y := ftrace.o 50libftrace-y := ftrace.o
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index b171778e3863..947c5b3f90c4 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -30,7 +30,7 @@
30static unsigned int blktrace_seq __read_mostly = 1; 30static unsigned int blktrace_seq __read_mostly = 1;
31 31
32static struct trace_array *blk_tr; 32static struct trace_array *blk_tr;
33static int __read_mostly blk_tracer_enabled; 33static bool blk_tracer_enabled __read_mostly;
34 34
35/* Select an alternative, minimalistic output than the original one */ 35/* Select an alternative, minimalistic output than the original one */
36#define TRACE_BLK_OPT_CLASSIC 0x1 36#define TRACE_BLK_OPT_CLASSIC 0x1
@@ -47,10 +47,9 @@ static struct tracer_flags blk_tracer_flags = {
47}; 47};
48 48
49/* Global reference count of probes */ 49/* Global reference count of probes */
50static DEFINE_MUTEX(blk_probe_mutex);
51static atomic_t blk_probes_ref = ATOMIC_INIT(0); 50static atomic_t blk_probes_ref = ATOMIC_INIT(0);
52 51
53static int blk_register_tracepoints(void); 52static void blk_register_tracepoints(void);
54static void blk_unregister_tracepoints(void); 53static void blk_unregister_tracepoints(void);
55 54
56/* 55/*
@@ -60,22 +59,39 @@ static void trace_note(struct blk_trace *bt, pid_t pid, int action,
60 const void *data, size_t len) 59 const void *data, size_t len)
61{ 60{
62 struct blk_io_trace *t; 61 struct blk_io_trace *t;
62 struct ring_buffer_event *event = NULL;
63 int pc = 0;
64 int cpu = smp_processor_id();
65 bool blk_tracer = blk_tracer_enabled;
66
67 if (blk_tracer) {
68 pc = preempt_count();
69 event = trace_buffer_lock_reserve(blk_tr, TRACE_BLK,
70 sizeof(*t) + len,
71 0, pc);
72 if (!event)
73 return;
74 t = ring_buffer_event_data(event);
75 goto record_it;
76 }
63 77
64 if (!bt->rchan) 78 if (!bt->rchan)
65 return; 79 return;
66 80
67 t = relay_reserve(bt->rchan, sizeof(*t) + len); 81 t = relay_reserve(bt->rchan, sizeof(*t) + len);
68 if (t) { 82 if (t) {
69 const int cpu = smp_processor_id();
70
71 t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION; 83 t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION;
72 t->time = ktime_to_ns(ktime_get()); 84 t->time = ktime_to_ns(ktime_get());
85record_it:
73 t->device = bt->dev; 86 t->device = bt->dev;
74 t->action = action; 87 t->action = action;
75 t->pid = pid; 88 t->pid = pid;
76 t->cpu = cpu; 89 t->cpu = cpu;
77 t->pdu_len = len; 90 t->pdu_len = len;
78 memcpy((void *) t + sizeof(*t), data, len); 91 memcpy((void *) t + sizeof(*t), data, len);
92
93 if (blk_tracer)
94 trace_buffer_unlock_commit(blk_tr, event, 0, pc);
79 } 95 }
80} 96}
81 97
@@ -111,14 +127,8 @@ void __trace_note_message(struct blk_trace *bt, const char *fmt, ...)
111 unsigned long flags; 127 unsigned long flags;
112 char *buf; 128 char *buf;
113 129
114 if (blk_tr) { 130 if (unlikely(bt->trace_state != Blktrace_running &&
115 va_start(args, fmt); 131 !blk_tracer_enabled))
116 ftrace_vprintk(fmt, args);
117 va_end(args);
118 return;
119 }
120
121 if (!bt->msg_data)
122 return; 132 return;
123 133
124 local_irq_save(flags); 134 local_irq_save(flags);
@@ -148,8 +158,8 @@ static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector,
148/* 158/*
149 * Data direction bit lookup 159 * Data direction bit lookup
150 */ 160 */
151static u32 ddir_act[2] __read_mostly = { BLK_TC_ACT(BLK_TC_READ), 161static const u32 ddir_act[2] = { BLK_TC_ACT(BLK_TC_READ),
152 BLK_TC_ACT(BLK_TC_WRITE) }; 162 BLK_TC_ACT(BLK_TC_WRITE) };
153 163
154/* The ilog2() calls fall out because they're constant */ 164/* The ilog2() calls fall out because they're constant */
155#define MASK_TC_BIT(rw, __name) ((rw & (1 << BIO_RW_ ## __name)) << \ 165#define MASK_TC_BIT(rw, __name) ((rw & (1 << BIO_RW_ ## __name)) << \
@@ -169,9 +179,9 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
169 unsigned long *sequence; 179 unsigned long *sequence;
170 pid_t pid; 180 pid_t pid;
171 int cpu, pc = 0; 181 int cpu, pc = 0;
182 bool blk_tracer = blk_tracer_enabled;
172 183
173 if (unlikely(bt->trace_state != Blktrace_running || 184 if (unlikely(bt->trace_state != Blktrace_running && !blk_tracer))
174 !blk_tracer_enabled))
175 return; 185 return;
176 186
177 what |= ddir_act[rw & WRITE]; 187 what |= ddir_act[rw & WRITE];
@@ -186,7 +196,7 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
186 return; 196 return;
187 cpu = raw_smp_processor_id(); 197 cpu = raw_smp_processor_id();
188 198
189 if (blk_tr) { 199 if (blk_tracer) {
190 tracing_record_cmdline(current); 200 tracing_record_cmdline(current);
191 201
192 pc = preempt_count(); 202 pc = preempt_count();
@@ -236,7 +246,7 @@ record_it:
236 if (pdu_len) 246 if (pdu_len)
237 memcpy((void *) t + sizeof(*t), pdu_data, pdu_len); 247 memcpy((void *) t + sizeof(*t), pdu_data, pdu_len);
238 248
239 if (blk_tr) { 249 if (blk_tracer) {
240 trace_buffer_unlock_commit(blk_tr, event, 0, pc); 250 trace_buffer_unlock_commit(blk_tr, event, 0, pc);
241 return; 251 return;
242 } 252 }
@@ -248,7 +258,7 @@ record_it:
248static struct dentry *blk_tree_root; 258static struct dentry *blk_tree_root;
249static DEFINE_MUTEX(blk_tree_mutex); 259static DEFINE_MUTEX(blk_tree_mutex);
250 260
251static void blk_trace_cleanup(struct blk_trace *bt) 261static void blk_trace_free(struct blk_trace *bt)
252{ 262{
253 debugfs_remove(bt->msg_file); 263 debugfs_remove(bt->msg_file);
254 debugfs_remove(bt->dropped_file); 264 debugfs_remove(bt->dropped_file);
@@ -256,10 +266,13 @@ static void blk_trace_cleanup(struct blk_trace *bt)
256 free_percpu(bt->sequence); 266 free_percpu(bt->sequence);
257 free_percpu(bt->msg_data); 267 free_percpu(bt->msg_data);
258 kfree(bt); 268 kfree(bt);
259 mutex_lock(&blk_probe_mutex); 269}
270
271static void blk_trace_cleanup(struct blk_trace *bt)
272{
273 blk_trace_free(bt);
260 if (atomic_dec_and_test(&blk_probes_ref)) 274 if (atomic_dec_and_test(&blk_probes_ref))
261 blk_unregister_tracepoints(); 275 blk_unregister_tracepoints();
262 mutex_unlock(&blk_probe_mutex);
263} 276}
264 277
265int blk_trace_remove(struct request_queue *q) 278int blk_trace_remove(struct request_queue *q)
@@ -270,8 +283,7 @@ int blk_trace_remove(struct request_queue *q)
270 if (!bt) 283 if (!bt)
271 return -EINVAL; 284 return -EINVAL;
272 285
273 if (bt->trace_state == Blktrace_setup || 286 if (bt->trace_state != Blktrace_running)
274 bt->trace_state == Blktrace_stopped)
275 blk_trace_cleanup(bt); 287 blk_trace_cleanup(bt);
276 288
277 return 0; 289 return 0;
@@ -414,11 +426,11 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
414 if (buts->name[i] == '/') 426 if (buts->name[i] == '/')
415 buts->name[i] = '_'; 427 buts->name[i] = '_';
416 428
417 ret = -ENOMEM;
418 bt = kzalloc(sizeof(*bt), GFP_KERNEL); 429 bt = kzalloc(sizeof(*bt), GFP_KERNEL);
419 if (!bt) 430 if (!bt)
420 goto err; 431 return -ENOMEM;
421 432
433 ret = -ENOMEM;
422 bt->sequence = alloc_percpu(unsigned long); 434 bt->sequence = alloc_percpu(unsigned long);
423 if (!bt->sequence) 435 if (!bt->sequence)
424 goto err; 436 goto err;
@@ -429,11 +441,15 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
429 441
430 ret = -ENOENT; 442 ret = -ENOENT;
431 443
444 mutex_lock(&blk_tree_mutex);
432 if (!blk_tree_root) { 445 if (!blk_tree_root) {
433 blk_tree_root = debugfs_create_dir("block", NULL); 446 blk_tree_root = debugfs_create_dir("block", NULL);
434 if (!blk_tree_root) 447 if (!blk_tree_root) {
435 return -ENOMEM; 448 mutex_unlock(&blk_tree_mutex);
449 goto err;
450 }
436 } 451 }
452 mutex_unlock(&blk_tree_mutex);
437 453
438 dir = debugfs_create_dir(buts->name, blk_tree_root); 454 dir = debugfs_create_dir(buts->name, blk_tree_root);
439 455
@@ -471,14 +487,6 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
471 bt->pid = buts->pid; 487 bt->pid = buts->pid;
472 bt->trace_state = Blktrace_setup; 488 bt->trace_state = Blktrace_setup;
473 489
474 mutex_lock(&blk_probe_mutex);
475 if (atomic_add_return(1, &blk_probes_ref) == 1) {
476 ret = blk_register_tracepoints();
477 if (ret)
478 goto probe_err;
479 }
480 mutex_unlock(&blk_probe_mutex);
481
482 ret = -EBUSY; 490 ret = -EBUSY;
483 old_bt = xchg(&q->blk_trace, bt); 491 old_bt = xchg(&q->blk_trace, bt);
484 if (old_bt) { 492 if (old_bt) {
@@ -486,22 +494,12 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
486 goto err; 494 goto err;
487 } 495 }
488 496
497 if (atomic_inc_return(&blk_probes_ref) == 1)
498 blk_register_tracepoints();
499
489 return 0; 500 return 0;
490probe_err:
491 atomic_dec(&blk_probes_ref);
492 mutex_unlock(&blk_probe_mutex);
493err: 501err:
494 if (bt) { 502 blk_trace_free(bt);
495 if (bt->msg_file)
496 debugfs_remove(bt->msg_file);
497 if (bt->dropped_file)
498 debugfs_remove(bt->dropped_file);
499 free_percpu(bt->sequence);
500 free_percpu(bt->msg_data);
501 if (bt->rchan)
502 relay_close(bt->rchan);
503 kfree(bt);
504 }
505 return ret; 503 return ret;
506} 504}
507 505
@@ -863,7 +861,7 @@ void blk_add_driver_data(struct request_queue *q,
863} 861}
864EXPORT_SYMBOL_GPL(blk_add_driver_data); 862EXPORT_SYMBOL_GPL(blk_add_driver_data);
865 863
866static int blk_register_tracepoints(void) 864static void blk_register_tracepoints(void)
867{ 865{
868 int ret; 866 int ret;
869 867
@@ -901,7 +899,6 @@ static int blk_register_tracepoints(void)
901 WARN_ON(ret); 899 WARN_ON(ret);
902 ret = register_trace_block_remap(blk_add_trace_remap); 900 ret = register_trace_block_remap(blk_add_trace_remap);
903 WARN_ON(ret); 901 WARN_ON(ret);
904 return 0;
905} 902}
906 903
907static void blk_unregister_tracepoints(void) 904static void blk_unregister_tracepoints(void)
@@ -934,25 +931,31 @@ static void blk_unregister_tracepoints(void)
934static void fill_rwbs(char *rwbs, const struct blk_io_trace *t) 931static void fill_rwbs(char *rwbs, const struct blk_io_trace *t)
935{ 932{
936 int i = 0; 933 int i = 0;
934 int tc = t->action >> BLK_TC_SHIFT;
935
936 if (t->action == BLK_TN_MESSAGE) {
937 rwbs[i++] = 'N';
938 goto out;
939 }
937 940
938 if (t->action & BLK_TC_DISCARD) 941 if (tc & BLK_TC_DISCARD)
939 rwbs[i++] = 'D'; 942 rwbs[i++] = 'D';
940 else if (t->action & BLK_TC_WRITE) 943 else if (tc & BLK_TC_WRITE)
941 rwbs[i++] = 'W'; 944 rwbs[i++] = 'W';
942 else if (t->bytes) 945 else if (t->bytes)
943 rwbs[i++] = 'R'; 946 rwbs[i++] = 'R';
944 else 947 else
945 rwbs[i++] = 'N'; 948 rwbs[i++] = 'N';
946 949
947 if (t->action & BLK_TC_AHEAD) 950 if (tc & BLK_TC_AHEAD)
948 rwbs[i++] = 'A'; 951 rwbs[i++] = 'A';
949 if (t->action & BLK_TC_BARRIER) 952 if (tc & BLK_TC_BARRIER)
950 rwbs[i++] = 'B'; 953 rwbs[i++] = 'B';
951 if (t->action & BLK_TC_SYNC) 954 if (tc & BLK_TC_SYNC)
952 rwbs[i++] = 'S'; 955 rwbs[i++] = 'S';
953 if (t->action & BLK_TC_META) 956 if (tc & BLK_TC_META)
954 rwbs[i++] = 'M'; 957 rwbs[i++] = 'M';
955 958out:
956 rwbs[i] = '\0'; 959 rwbs[i] = '\0';
957} 960}
958 961
@@ -979,7 +982,7 @@ static inline unsigned long long t_sector(const struct trace_entry *ent)
979 982
980static inline __u16 t_error(const struct trace_entry *ent) 983static inline __u16 t_error(const struct trace_entry *ent)
981{ 984{
982 return te_blk_io_trace(ent)->sector; 985 return te_blk_io_trace(ent)->error;
983} 986}
984 987
985static __u64 get_pdu_int(const struct trace_entry *ent) 988static __u64 get_pdu_int(const struct trace_entry *ent)
@@ -999,29 +1002,31 @@ static void get_pdu_remap(const struct trace_entry *ent,
999 r->sector = be64_to_cpu(sector); 1002 r->sector = be64_to_cpu(sector);
1000} 1003}
1001 1004
1002static int blk_log_action_iter(struct trace_iterator *iter, const char *act) 1005typedef int (blk_log_action_t) (struct trace_iterator *iter, const char *act);
1006
1007static int blk_log_action_classic(struct trace_iterator *iter, const char *act)
1003{ 1008{
1004 char rwbs[6]; 1009 char rwbs[6];
1005 unsigned long long ts = ns2usecs(iter->ts); 1010 unsigned long long ts = iter->ts;
1006 unsigned long usec_rem = do_div(ts, USEC_PER_SEC); 1011 unsigned long nsec_rem = do_div(ts, NSEC_PER_SEC);
1007 unsigned secs = (unsigned long)ts; 1012 unsigned secs = (unsigned long)ts;
1008 const struct trace_entry *ent = iter->ent; 1013 const struct blk_io_trace *t = te_blk_io_trace(iter->ent);
1009 const struct blk_io_trace *t = (const struct blk_io_trace *)ent;
1010 1014
1011 fill_rwbs(rwbs, t); 1015 fill_rwbs(rwbs, t);
1012 1016
1013 return trace_seq_printf(&iter->seq, 1017 return trace_seq_printf(&iter->seq,
1014 "%3d,%-3d %2d %5d.%06lu %5u %2s %3s ", 1018 "%3d,%-3d %2d %5d.%09lu %5u %2s %3s ",
1015 MAJOR(t->device), MINOR(t->device), iter->cpu, 1019 MAJOR(t->device), MINOR(t->device), iter->cpu,
1016 secs, usec_rem, ent->pid, act, rwbs); 1020 secs, nsec_rem, iter->ent->pid, act, rwbs);
1017} 1021}
1018 1022
1019static int blk_log_action_seq(struct trace_seq *s, const struct blk_io_trace *t, 1023static int blk_log_action(struct trace_iterator *iter, const char *act)
1020 const char *act)
1021{ 1024{
1022 char rwbs[6]; 1025 char rwbs[6];
1026 const struct blk_io_trace *t = te_blk_io_trace(iter->ent);
1027
1023 fill_rwbs(rwbs, t); 1028 fill_rwbs(rwbs, t);
1024 return trace_seq_printf(s, "%3d,%-3d %2s %3s ", 1029 return trace_seq_printf(&iter->seq, "%3d,%-3d %2s %3s ",
1025 MAJOR(t->device), MINOR(t->device), act, rwbs); 1030 MAJOR(t->device), MINOR(t->device), act, rwbs);
1026} 1031}
1027 1032
@@ -1085,6 +1090,17 @@ static int blk_log_split(struct trace_seq *s, const struct trace_entry *ent)
1085 get_pdu_int(ent), cmd); 1090 get_pdu_int(ent), cmd);
1086} 1091}
1087 1092
1093static int blk_log_msg(struct trace_seq *s, const struct trace_entry *ent)
1094{
1095 int ret;
1096 const struct blk_io_trace *t = te_blk_io_trace(ent);
1097
1098 ret = trace_seq_putmem(s, t + 1, t->pdu_len);
1099 if (ret)
1100 return trace_seq_putc(s, '\n');
1101 return ret;
1102}
1103
1088/* 1104/*
1089 * struct tracer operations 1105 * struct tracer operations
1090 */ 1106 */
@@ -1099,11 +1115,7 @@ static void blk_tracer_print_header(struct seq_file *m)
1099 1115
1100static void blk_tracer_start(struct trace_array *tr) 1116static void blk_tracer_start(struct trace_array *tr)
1101{ 1117{
1102 mutex_lock(&blk_probe_mutex); 1118 blk_tracer_enabled = true;
1103 if (atomic_add_return(1, &blk_probes_ref) == 1)
1104 if (blk_register_tracepoints())
1105 atomic_dec(&blk_probes_ref);
1106 mutex_unlock(&blk_probe_mutex);
1107 trace_flags &= ~TRACE_ITER_CONTEXT_INFO; 1119 trace_flags &= ~TRACE_ITER_CONTEXT_INFO;
1108} 1120}
1109 1121
@@ -1111,38 +1123,24 @@ static int blk_tracer_init(struct trace_array *tr)
1111{ 1123{
1112 blk_tr = tr; 1124 blk_tr = tr;
1113 blk_tracer_start(tr); 1125 blk_tracer_start(tr);
1114 mutex_lock(&blk_probe_mutex);
1115 blk_tracer_enabled++;
1116 mutex_unlock(&blk_probe_mutex);
1117 return 0; 1126 return 0;
1118} 1127}
1119 1128
1120static void blk_tracer_stop(struct trace_array *tr) 1129static void blk_tracer_stop(struct trace_array *tr)
1121{ 1130{
1131 blk_tracer_enabled = false;
1122 trace_flags |= TRACE_ITER_CONTEXT_INFO; 1132 trace_flags |= TRACE_ITER_CONTEXT_INFO;
1123 mutex_lock(&blk_probe_mutex);
1124 if (atomic_dec_and_test(&blk_probes_ref))
1125 blk_unregister_tracepoints();
1126 mutex_unlock(&blk_probe_mutex);
1127} 1133}
1128 1134
1129static void blk_tracer_reset(struct trace_array *tr) 1135static void blk_tracer_reset(struct trace_array *tr)
1130{ 1136{
1131 if (!atomic_read(&blk_probes_ref))
1132 return;
1133
1134 mutex_lock(&blk_probe_mutex);
1135 blk_tracer_enabled--;
1136 WARN_ON(blk_tracer_enabled < 0);
1137 mutex_unlock(&blk_probe_mutex);
1138
1139 blk_tracer_stop(tr); 1137 blk_tracer_stop(tr);
1140} 1138}
1141 1139
1142static struct { 1140static const struct {
1143 const char *act[2]; 1141 const char *act[2];
1144 int (*print)(struct trace_seq *s, const struct trace_entry *ent); 1142 int (*print)(struct trace_seq *s, const struct trace_entry *ent);
1145} what2act[] __read_mostly = { 1143} what2act[] = {
1146 [__BLK_TA_QUEUE] = {{ "Q", "queue" }, blk_log_generic }, 1144 [__BLK_TA_QUEUE] = {{ "Q", "queue" }, blk_log_generic },
1147 [__BLK_TA_BACKMERGE] = {{ "M", "backmerge" }, blk_log_generic }, 1145 [__BLK_TA_BACKMERGE] = {{ "M", "backmerge" }, blk_log_generic },
1148 [__BLK_TA_FRONTMERGE] = {{ "F", "frontmerge" }, blk_log_generic }, 1146 [__BLK_TA_FRONTMERGE] = {{ "F", "frontmerge" }, blk_log_generic },
@@ -1160,29 +1158,48 @@ static struct {
1160 [__BLK_TA_REMAP] = {{ "A", "remap" }, blk_log_remap }, 1158 [__BLK_TA_REMAP] = {{ "A", "remap" }, blk_log_remap },
1161}; 1159};
1162 1160
1163static enum print_line_t blk_trace_event_print(struct trace_iterator *iter, 1161static enum print_line_t print_one_line(struct trace_iterator *iter,
1164 int flags) 1162 bool classic)
1165{ 1163{
1166 struct trace_seq *s = &iter->seq; 1164 struct trace_seq *s = &iter->seq;
1167 const struct blk_io_trace *t = (struct blk_io_trace *)iter->ent; 1165 const struct blk_io_trace *t;
1168 const u16 what = t->action & ((1 << BLK_TC_SHIFT) - 1); 1166 u16 what;
1169 int ret; 1167 int ret;
1168 bool long_act;
1169 blk_log_action_t *log_action;
1170 1170
1171 if (!trace_print_context(iter)) 1171 t = te_blk_io_trace(iter->ent);
1172 return TRACE_TYPE_PARTIAL_LINE; 1172 what = t->action & ((1 << BLK_TC_SHIFT) - 1);
1173 long_act = !!(trace_flags & TRACE_ITER_VERBOSE);
1174 log_action = classic ? &blk_log_action_classic : &blk_log_action;
1173 1175
1174 if (unlikely(what == 0 || what > ARRAY_SIZE(what2act))) 1176 if (t->action == BLK_TN_MESSAGE) {
1177 ret = log_action(iter, long_act ? "message" : "m");
1178 if (ret)
1179 ret = blk_log_msg(s, iter->ent);
1180 goto out;
1181 }
1182
1183 if (unlikely(what == 0 || what >= ARRAY_SIZE(what2act)))
1175 ret = trace_seq_printf(s, "Bad pc action %x\n", what); 1184 ret = trace_seq_printf(s, "Bad pc action %x\n", what);
1176 else { 1185 else {
1177 const bool long_act = !!(trace_flags & TRACE_ITER_VERBOSE); 1186 ret = log_action(iter, what2act[what].act[long_act]);
1178 ret = blk_log_action_seq(s, t, what2act[what].act[long_act]);
1179 if (ret) 1187 if (ret)
1180 ret = what2act[what].print(s, iter->ent); 1188 ret = what2act[what].print(s, iter->ent);
1181 } 1189 }
1182 1190out:
1183 return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE; 1191 return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE;
1184} 1192}
1185 1193
1194static enum print_line_t blk_trace_event_print(struct trace_iterator *iter,
1195 int flags)
1196{
1197 if (!trace_print_context(iter))
1198 return TRACE_TYPE_PARTIAL_LINE;
1199
1200 return print_one_line(iter, false);
1201}
1202
1186static int blk_trace_synthesize_old_trace(struct trace_iterator *iter) 1203static int blk_trace_synthesize_old_trace(struct trace_iterator *iter)
1187{ 1204{
1188 struct trace_seq *s = &iter->seq; 1205 struct trace_seq *s = &iter->seq;
@@ -1190,7 +1207,7 @@ static int blk_trace_synthesize_old_trace(struct trace_iterator *iter)
1190 const int offset = offsetof(struct blk_io_trace, sector); 1207 const int offset = offsetof(struct blk_io_trace, sector);
1191 struct blk_io_trace old = { 1208 struct blk_io_trace old = {
1192 .magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION, 1209 .magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION,
1193 .time = ns2usecs(iter->ts), 1210 .time = iter->ts,
1194 }; 1211 };
1195 1212
1196 if (!trace_seq_putmem(s, &old, offset)) 1213 if (!trace_seq_putmem(s, &old, offset))
@@ -1208,26 +1225,10 @@ blk_trace_event_print_binary(struct trace_iterator *iter, int flags)
1208 1225
1209static enum print_line_t blk_tracer_print_line(struct trace_iterator *iter) 1226static enum print_line_t blk_tracer_print_line(struct trace_iterator *iter)
1210{ 1227{
1211 const struct blk_io_trace *t;
1212 u16 what;
1213 int ret;
1214
1215 if (!(blk_tracer_flags.val & TRACE_BLK_OPT_CLASSIC)) 1228 if (!(blk_tracer_flags.val & TRACE_BLK_OPT_CLASSIC))
1216 return TRACE_TYPE_UNHANDLED; 1229 return TRACE_TYPE_UNHANDLED;
1217 1230
1218 t = (const struct blk_io_trace *)iter->ent; 1231 return print_one_line(iter, true);
1219 what = t->action & ((1 << BLK_TC_SHIFT) - 1);
1220
1221 if (unlikely(what == 0 || what > ARRAY_SIZE(what2act)))
1222 ret = trace_seq_printf(&iter->seq, "Bad pc action %x\n", what);
1223 else {
1224 const bool long_act = !!(trace_flags & TRACE_ITER_VERBOSE);
1225 ret = blk_log_action_iter(iter, what2act[what].act[long_act]);
1226 if (ret)
1227 ret = what2act[what].print(&iter->seq, iter->ent);
1228 }
1229
1230 return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE;
1231} 1232}
1232 1233
1233static struct tracer blk_tracer __read_mostly = { 1234static struct tracer blk_tracer __read_mostly = {
@@ -1273,7 +1274,10 @@ static int blk_trace_remove_queue(struct request_queue *q)
1273 if (bt == NULL) 1274 if (bt == NULL)
1274 return -EINVAL; 1275 return -EINVAL;
1275 1276
1276 kfree(bt); 1277 if (atomic_dec_and_test(&blk_probes_ref))
1278 blk_unregister_tracepoints();
1279
1280 blk_trace_free(bt);
1277 return 0; 1281 return 0;
1278} 1282}
1279 1283
@@ -1283,26 +1287,33 @@ static int blk_trace_remove_queue(struct request_queue *q)
1283static int blk_trace_setup_queue(struct request_queue *q, dev_t dev) 1287static int blk_trace_setup_queue(struct request_queue *q, dev_t dev)
1284{ 1288{
1285 struct blk_trace *old_bt, *bt = NULL; 1289 struct blk_trace *old_bt, *bt = NULL;
1286 int ret; 1290 int ret = -ENOMEM;
1287 1291
1288 ret = -ENOMEM;
1289 bt = kzalloc(sizeof(*bt), GFP_KERNEL); 1292 bt = kzalloc(sizeof(*bt), GFP_KERNEL);
1290 if (!bt) 1293 if (!bt)
1291 goto err; 1294 return -ENOMEM;
1295
1296 bt->msg_data = __alloc_percpu(BLK_TN_MAX_MSG, __alignof__(char));
1297 if (!bt->msg_data)
1298 goto free_bt;
1292 1299
1293 bt->dev = dev; 1300 bt->dev = dev;
1294 bt->act_mask = (u16)-1; 1301 bt->act_mask = (u16)-1;
1295 bt->end_lba = -1ULL; 1302 bt->end_lba = -1ULL;
1296 bt->trace_state = Blktrace_running;
1297 1303
1298 old_bt = xchg(&q->blk_trace, bt); 1304 old_bt = xchg(&q->blk_trace, bt);
1299 if (old_bt != NULL) { 1305 if (old_bt != NULL) {
1300 (void)xchg(&q->blk_trace, old_bt); 1306 (void)xchg(&q->blk_trace, old_bt);
1301 kfree(bt);
1302 ret = -EBUSY; 1307 ret = -EBUSY;
1308 goto free_bt;
1303 } 1309 }
1310
1311 if (atomic_inc_return(&blk_probes_ref) == 1)
1312 blk_register_tracepoints();
1304 return 0; 1313 return 0;
1305err: 1314
1315free_bt:
1316 blk_trace_free(bt);
1306 return ret; 1317 return ret;
1307} 1318}
1308 1319
@@ -1310,72 +1321,6 @@ err:
1310 * sysfs interface to enable and configure tracing 1321 * sysfs interface to enable and configure tracing
1311 */ 1322 */
1312 1323
1313static ssize_t sysfs_blk_trace_enable_show(struct device *dev,
1314 struct device_attribute *attr,
1315 char *buf)
1316{
1317 struct hd_struct *p = dev_to_part(dev);
1318 struct block_device *bdev;
1319 ssize_t ret = -ENXIO;
1320
1321 lock_kernel();
1322 bdev = bdget(part_devt(p));
1323 if (bdev != NULL) {
1324 struct request_queue *q = bdev_get_queue(bdev);
1325
1326 if (q != NULL) {
1327 mutex_lock(&bdev->bd_mutex);
1328 ret = sprintf(buf, "%u\n", !!q->blk_trace);
1329 mutex_unlock(&bdev->bd_mutex);
1330 }
1331
1332 bdput(bdev);
1333 }
1334
1335 unlock_kernel();
1336 return ret;
1337}
1338
1339static ssize_t sysfs_blk_trace_enable_store(struct device *dev,
1340 struct device_attribute *attr,
1341 const char *buf, size_t count)
1342{
1343 struct block_device *bdev;
1344 struct request_queue *q;
1345 struct hd_struct *p;
1346 int value;
1347 ssize_t ret = -ENXIO;
1348
1349 if (count == 0 || sscanf(buf, "%d", &value) != 1)
1350 goto out;
1351
1352 lock_kernel();
1353 p = dev_to_part(dev);
1354 bdev = bdget(part_devt(p));
1355 if (bdev == NULL)
1356 goto out_unlock_kernel;
1357
1358 q = bdev_get_queue(bdev);
1359 if (q == NULL)
1360 goto out_bdput;
1361
1362 mutex_lock(&bdev->bd_mutex);
1363 if (value)
1364 ret = blk_trace_setup_queue(q, bdev->bd_dev);
1365 else
1366 ret = blk_trace_remove_queue(q);
1367 mutex_unlock(&bdev->bd_mutex);
1368
1369 if (ret == 0)
1370 ret = count;
1371out_bdput:
1372 bdput(bdev);
1373out_unlock_kernel:
1374 unlock_kernel();
1375out:
1376 return ret;
1377}
1378
1379static ssize_t sysfs_blk_trace_attr_show(struct device *dev, 1324static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
1380 struct device_attribute *attr, 1325 struct device_attribute *attr,
1381 char *buf); 1326 char *buf);
@@ -1387,8 +1332,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
1387 sysfs_blk_trace_attr_show, \ 1332 sysfs_blk_trace_attr_show, \
1388 sysfs_blk_trace_attr_store) 1333 sysfs_blk_trace_attr_store)
1389 1334
1390static DEVICE_ATTR(enable, S_IRUGO | S_IWUSR, 1335static BLK_TRACE_DEVICE_ATTR(enable);
1391 sysfs_blk_trace_enable_show, sysfs_blk_trace_enable_store);
1392static BLK_TRACE_DEVICE_ATTR(act_mask); 1336static BLK_TRACE_DEVICE_ATTR(act_mask);
1393static BLK_TRACE_DEVICE_ATTR(pid); 1337static BLK_TRACE_DEVICE_ATTR(pid);
1394static BLK_TRACE_DEVICE_ATTR(start_lba); 1338static BLK_TRACE_DEVICE_ATTR(start_lba);
@@ -1408,53 +1352,85 @@ struct attribute_group blk_trace_attr_group = {
1408 .attrs = blk_trace_attrs, 1352 .attrs = blk_trace_attrs,
1409}; 1353};
1410 1354
1411static int blk_str2act_mask(const char *str) 1355static const struct {
1356 int mask;
1357 const char *str;
1358} mask_maps[] = {
1359 { BLK_TC_READ, "read" },
1360 { BLK_TC_WRITE, "write" },
1361 { BLK_TC_BARRIER, "barrier" },
1362 { BLK_TC_SYNC, "sync" },
1363 { BLK_TC_QUEUE, "queue" },
1364 { BLK_TC_REQUEUE, "requeue" },
1365 { BLK_TC_ISSUE, "issue" },
1366 { BLK_TC_COMPLETE, "complete" },
1367 { BLK_TC_FS, "fs" },
1368 { BLK_TC_PC, "pc" },
1369 { BLK_TC_AHEAD, "ahead" },
1370 { BLK_TC_META, "meta" },
1371 { BLK_TC_DISCARD, "discard" },
1372 { BLK_TC_DRV_DATA, "drv_data" },
1373};
1374
1375static int blk_trace_str2mask(const char *str)
1412{ 1376{
1377 int i;
1413 int mask = 0; 1378 int mask = 0;
1414 char *copy = kstrdup(str, GFP_KERNEL), *s; 1379 char *s, *token;
1415 1380
1416 if (copy == NULL) 1381 s = kstrdup(str, GFP_KERNEL);
1382 if (s == NULL)
1417 return -ENOMEM; 1383 return -ENOMEM;
1418 1384 s = strstrip(s);
1419 s = strstrip(copy);
1420 1385
1421 while (1) { 1386 while (1) {
1422 char *sep = strchr(s, ','); 1387 token = strsep(&s, ",");
1423 1388 if (token == NULL)
1424 if (sep != NULL)
1425 *sep = '\0';
1426
1427 if (strcasecmp(s, "barrier") == 0)
1428 mask |= BLK_TC_BARRIER;
1429 else if (strcasecmp(s, "complete") == 0)
1430 mask |= BLK_TC_COMPLETE;
1431 else if (strcasecmp(s, "fs") == 0)
1432 mask |= BLK_TC_FS;
1433 else if (strcasecmp(s, "issue") == 0)
1434 mask |= BLK_TC_ISSUE;
1435 else if (strcasecmp(s, "pc") == 0)
1436 mask |= BLK_TC_PC;
1437 else if (strcasecmp(s, "queue") == 0)
1438 mask |= BLK_TC_QUEUE;
1439 else if (strcasecmp(s, "read") == 0)
1440 mask |= BLK_TC_READ;
1441 else if (strcasecmp(s, "requeue") == 0)
1442 mask |= BLK_TC_REQUEUE;
1443 else if (strcasecmp(s, "sync") == 0)
1444 mask |= BLK_TC_SYNC;
1445 else if (strcasecmp(s, "write") == 0)
1446 mask |= BLK_TC_WRITE;
1447
1448 if (sep == NULL)
1449 break; 1389 break;
1450 1390
1451 s = sep + 1; 1391 if (*token == '\0')
1392 continue;
1393
1394 for (i = 0; i < ARRAY_SIZE(mask_maps); i++) {
1395 if (strcasecmp(token, mask_maps[i].str) == 0) {
1396 mask |= mask_maps[i].mask;
1397 break;
1398 }
1399 }
1400 if (i == ARRAY_SIZE(mask_maps)) {
1401 mask = -EINVAL;
1402 break;
1403 }
1452 } 1404 }
1453 kfree(copy); 1405 kfree(s);
1454 1406
1455 return mask; 1407 return mask;
1456} 1408}
1457 1409
1410static ssize_t blk_trace_mask2str(char *buf, int mask)
1411{
1412 int i;
1413 char *p = buf;
1414
1415 for (i = 0; i < ARRAY_SIZE(mask_maps); i++) {
1416 if (mask & mask_maps[i].mask) {
1417 p += sprintf(p, "%s%s",
1418 (p == buf) ? "" : ",", mask_maps[i].str);
1419 }
1420 }
1421 *p++ = '\n';
1422
1423 return p - buf;
1424}
1425
1426static struct request_queue *blk_trace_get_queue(struct block_device *bdev)
1427{
1428 if (bdev->bd_disk == NULL)
1429 return NULL;
1430
1431 return bdev_get_queue(bdev);
1432}
1433
1458static ssize_t sysfs_blk_trace_attr_show(struct device *dev, 1434static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
1459 struct device_attribute *attr, 1435 struct device_attribute *attr,
1460 char *buf) 1436 char *buf)
@@ -1469,20 +1445,29 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
1469 if (bdev == NULL) 1445 if (bdev == NULL)
1470 goto out_unlock_kernel; 1446 goto out_unlock_kernel;
1471 1447
1472 q = bdev_get_queue(bdev); 1448 q = blk_trace_get_queue(bdev);
1473 if (q == NULL) 1449 if (q == NULL)
1474 goto out_bdput; 1450 goto out_bdput;
1451
1475 mutex_lock(&bdev->bd_mutex); 1452 mutex_lock(&bdev->bd_mutex);
1453
1454 if (attr == &dev_attr_enable) {
1455 ret = sprintf(buf, "%u\n", !!q->blk_trace);
1456 goto out_unlock_bdev;
1457 }
1458
1476 if (q->blk_trace == NULL) 1459 if (q->blk_trace == NULL)
1477 ret = sprintf(buf, "disabled\n"); 1460 ret = sprintf(buf, "disabled\n");
1478 else if (attr == &dev_attr_act_mask) 1461 else if (attr == &dev_attr_act_mask)
1479 ret = sprintf(buf, "%#x\n", q->blk_trace->act_mask); 1462 ret = blk_trace_mask2str(buf, q->blk_trace->act_mask);
1480 else if (attr == &dev_attr_pid) 1463 else if (attr == &dev_attr_pid)
1481 ret = sprintf(buf, "%u\n", q->blk_trace->pid); 1464 ret = sprintf(buf, "%u\n", q->blk_trace->pid);
1482 else if (attr == &dev_attr_start_lba) 1465 else if (attr == &dev_attr_start_lba)
1483 ret = sprintf(buf, "%llu\n", q->blk_trace->start_lba); 1466 ret = sprintf(buf, "%llu\n", q->blk_trace->start_lba);
1484 else if (attr == &dev_attr_end_lba) 1467 else if (attr == &dev_attr_end_lba)
1485 ret = sprintf(buf, "%llu\n", q->blk_trace->end_lba); 1468 ret = sprintf(buf, "%llu\n", q->blk_trace->end_lba);
1469
1470out_unlock_bdev:
1486 mutex_unlock(&bdev->bd_mutex); 1471 mutex_unlock(&bdev->bd_mutex);
1487out_bdput: 1472out_bdput:
1488 bdput(bdev); 1473 bdput(bdev);
@@ -1499,7 +1484,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
1499 struct request_queue *q; 1484 struct request_queue *q;
1500 struct hd_struct *p; 1485 struct hd_struct *p;
1501 u64 value; 1486 u64 value;
1502 ssize_t ret = -ENXIO; 1487 ssize_t ret = -EINVAL;
1503 1488
1504 if (count == 0) 1489 if (count == 0)
1505 goto out; 1490 goto out;
@@ -1507,24 +1492,36 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
1507 if (attr == &dev_attr_act_mask) { 1492 if (attr == &dev_attr_act_mask) {
1508 if (sscanf(buf, "%llx", &value) != 1) { 1493 if (sscanf(buf, "%llx", &value) != 1) {
1509 /* Assume it is a list of trace category names */ 1494 /* Assume it is a list of trace category names */
1510 value = blk_str2act_mask(buf); 1495 ret = blk_trace_str2mask(buf);
1511 if (value < 0) 1496 if (ret < 0)
1512 goto out; 1497 goto out;
1498 value = ret;
1513 } 1499 }
1514 } else if (sscanf(buf, "%llu", &value) != 1) 1500 } else if (sscanf(buf, "%llu", &value) != 1)
1515 goto out; 1501 goto out;
1516 1502
1503 ret = -ENXIO;
1504
1517 lock_kernel(); 1505 lock_kernel();
1518 p = dev_to_part(dev); 1506 p = dev_to_part(dev);
1519 bdev = bdget(part_devt(p)); 1507 bdev = bdget(part_devt(p));
1520 if (bdev == NULL) 1508 if (bdev == NULL)
1521 goto out_unlock_kernel; 1509 goto out_unlock_kernel;
1522 1510
1523 q = bdev_get_queue(bdev); 1511 q = blk_trace_get_queue(bdev);
1524 if (q == NULL) 1512 if (q == NULL)
1525 goto out_bdput; 1513 goto out_bdput;
1526 1514
1527 mutex_lock(&bdev->bd_mutex); 1515 mutex_lock(&bdev->bd_mutex);
1516
1517 if (attr == &dev_attr_enable) {
1518 if (value)
1519 ret = blk_trace_setup_queue(q, bdev->bd_dev);
1520 else
1521 ret = blk_trace_remove_queue(q);
1522 goto out_unlock_bdev;
1523 }
1524
1528 ret = 0; 1525 ret = 0;
1529 if (q->blk_trace == NULL) 1526 if (q->blk_trace == NULL)
1530 ret = blk_trace_setup_queue(q, bdev->bd_dev); 1527 ret = blk_trace_setup_queue(q, bdev->bd_dev);
@@ -1538,13 +1535,15 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
1538 q->blk_trace->start_lba = value; 1535 q->blk_trace->start_lba = value;
1539 else if (attr == &dev_attr_end_lba) 1536 else if (attr == &dev_attr_end_lba)
1540 q->blk_trace->end_lba = value; 1537 q->blk_trace->end_lba = value;
1541 ret = count;
1542 } 1538 }
1539
1540out_unlock_bdev:
1543 mutex_unlock(&bdev->bd_mutex); 1541 mutex_unlock(&bdev->bd_mutex);
1544out_bdput: 1542out_bdput:
1545 bdput(bdev); 1543 bdput(bdev);
1546out_unlock_kernel: 1544out_unlock_kernel:
1547 unlock_kernel(); 1545 unlock_kernel();
1548out: 1546out:
1549 return ret; 1547 return ret ? ret : count;
1550} 1548}
1549
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index c7f4a4be05dc..678e3d6caf85 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -928,9 +928,14 @@ void ftrace_release(void *start, unsigned long size)
928 928
929 mutex_lock(&ftrace_lock); 929 mutex_lock(&ftrace_lock);
930 do_for_each_ftrace_rec(pg, rec) { 930 do_for_each_ftrace_rec(pg, rec) {
931 if ((rec->ip >= s) && (rec->ip < e) && 931 if ((rec->ip >= s) && (rec->ip < e)) {
932 !(rec->flags & FTRACE_FL_FREE)) 932 /*
933 * rec->ip is changed in ftrace_free_rec()
934 * It should not between s and e if record was freed.
935 */
936 FTRACE_WARN_ON(rec->flags & FTRACE_FL_FREE);
933 ftrace_free_rec(rec); 937 ftrace_free_rec(rec);
938 }
934 } while_for_each_ftrace_rec(); 939 } while_for_each_ftrace_rec();
935 mutex_unlock(&ftrace_lock); 940 mutex_unlock(&ftrace_lock);
936} 941}
@@ -3287,6 +3292,9 @@ void unregister_ftrace_graph(void)
3287{ 3292{
3288 mutex_lock(&ftrace_lock); 3293 mutex_lock(&ftrace_lock);
3289 3294
3295 if (!unlikely(atomic_read(&ftrace_graph_active)))
3296 goto out;
3297
3290 atomic_dec(&ftrace_graph_active); 3298 atomic_dec(&ftrace_graph_active);
3291 unregister_trace_sched_switch(ftrace_graph_probe_sched_switch); 3299 unregister_trace_sched_switch(ftrace_graph_probe_sched_switch);
3292 ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub; 3300 ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub;
@@ -3294,6 +3302,7 @@ void unregister_ftrace_graph(void)
3294 ftrace_shutdown(FTRACE_STOP_FUNC_RET); 3302 ftrace_shutdown(FTRACE_STOP_FUNC_RET);
3295 unregister_pm_notifier(&ftrace_suspend_notifier); 3303 unregister_pm_notifier(&ftrace_suspend_notifier);
3296 3304
3305 out:
3297 mutex_unlock(&ftrace_lock); 3306 mutex_unlock(&ftrace_lock);
3298} 3307}
3299 3308
diff --git a/kernel/trace/kmemtrace.c b/kernel/trace/kmemtrace.c
index ae201b3eda89..5011f4d91e37 100644
--- a/kernel/trace/kmemtrace.c
+++ b/kernel/trace/kmemtrace.c
@@ -6,14 +6,16 @@
6 * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com> 6 * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
7 */ 7 */
8 8
9#include <linux/dcache.h> 9#include <linux/tracepoint.h>
10#include <linux/seq_file.h>
10#include <linux/debugfs.h> 11#include <linux/debugfs.h>
12#include <linux/dcache.h>
11#include <linux/fs.h> 13#include <linux/fs.h>
12#include <linux/seq_file.h> 14
13#include <trace/kmemtrace.h> 15#include <trace/kmemtrace.h>
14 16
15#include "trace.h"
16#include "trace_output.h" 17#include "trace_output.h"
18#include "trace.h"
17 19
18/* Select an alternative, minimalistic output than the original one */ 20/* Select an alternative, minimalistic output than the original one */
19#define TRACE_KMEM_OPT_MINIMAL 0x1 21#define TRACE_KMEM_OPT_MINIMAL 0x1
@@ -25,14 +27,156 @@ static struct tracer_opt kmem_opts[] = {
25}; 27};
26 28
27static struct tracer_flags kmem_tracer_flags = { 29static struct tracer_flags kmem_tracer_flags = {
28 .val = 0, 30 .val = 0,
29 .opts = kmem_opts 31 .opts = kmem_opts
30}; 32};
31 33
32
33static bool kmem_tracing_enabled __read_mostly;
34static struct trace_array *kmemtrace_array; 34static struct trace_array *kmemtrace_array;
35 35
36/* Trace allocations */
37static inline void kmemtrace_alloc(enum kmemtrace_type_id type_id,
38 unsigned long call_site,
39 const void *ptr,
40 size_t bytes_req,
41 size_t bytes_alloc,
42 gfp_t gfp_flags,
43 int node)
44{
45 struct trace_array *tr = kmemtrace_array;
46 struct kmemtrace_alloc_entry *entry;
47 struct ring_buffer_event *event;
48
49 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry));
50 if (!event)
51 return;
52
53 entry = ring_buffer_event_data(event);
54 tracing_generic_entry_update(&entry->ent, 0, 0);
55
56 entry->ent.type = TRACE_KMEM_ALLOC;
57 entry->type_id = type_id;
58 entry->call_site = call_site;
59 entry->ptr = ptr;
60 entry->bytes_req = bytes_req;
61 entry->bytes_alloc = bytes_alloc;
62 entry->gfp_flags = gfp_flags;
63 entry->node = node;
64
65 ring_buffer_unlock_commit(tr->buffer, event);
66
67 trace_wake_up();
68}
69
70static inline void kmemtrace_free(enum kmemtrace_type_id type_id,
71 unsigned long call_site,
72 const void *ptr)
73{
74 struct trace_array *tr = kmemtrace_array;
75 struct kmemtrace_free_entry *entry;
76 struct ring_buffer_event *event;
77
78 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry));
79 if (!event)
80 return;
81 entry = ring_buffer_event_data(event);
82 tracing_generic_entry_update(&entry->ent, 0, 0);
83
84 entry->ent.type = TRACE_KMEM_FREE;
85 entry->type_id = type_id;
86 entry->call_site = call_site;
87 entry->ptr = ptr;
88
89 ring_buffer_unlock_commit(tr->buffer, event);
90
91 trace_wake_up();
92}
93
94static void kmemtrace_kmalloc(unsigned long call_site,
95 const void *ptr,
96 size_t bytes_req,
97 size_t bytes_alloc,
98 gfp_t gfp_flags)
99{
100 kmemtrace_alloc(KMEMTRACE_TYPE_KMALLOC, call_site, ptr,
101 bytes_req, bytes_alloc, gfp_flags, -1);
102}
103
104static void kmemtrace_kmem_cache_alloc(unsigned long call_site,
105 const void *ptr,
106 size_t bytes_req,
107 size_t bytes_alloc,
108 gfp_t gfp_flags)
109{
110 kmemtrace_alloc(KMEMTRACE_TYPE_CACHE, call_site, ptr,
111 bytes_req, bytes_alloc, gfp_flags, -1);
112}
113
114static void kmemtrace_kmalloc_node(unsigned long call_site,
115 const void *ptr,
116 size_t bytes_req,
117 size_t bytes_alloc,
118 gfp_t gfp_flags,
119 int node)
120{
121 kmemtrace_alloc(KMEMTRACE_TYPE_KMALLOC, call_site, ptr,
122 bytes_req, bytes_alloc, gfp_flags, node);
123}
124
125static void kmemtrace_kmem_cache_alloc_node(unsigned long call_site,
126 const void *ptr,
127 size_t bytes_req,
128 size_t bytes_alloc,
129 gfp_t gfp_flags,
130 int node)
131{
132 kmemtrace_alloc(KMEMTRACE_TYPE_CACHE, call_site, ptr,
133 bytes_req, bytes_alloc, gfp_flags, node);
134}
135
136static void kmemtrace_kfree(unsigned long call_site, const void *ptr)
137{
138 kmemtrace_free(KMEMTRACE_TYPE_KMALLOC, call_site, ptr);
139}
140
141static void kmemtrace_kmem_cache_free(unsigned long call_site, const void *ptr)
142{
143 kmemtrace_free(KMEMTRACE_TYPE_CACHE, call_site, ptr);
144}
145
146static int kmemtrace_start_probes(void)
147{
148 int err;
149
150 err = register_trace_kmalloc(kmemtrace_kmalloc);
151 if (err)
152 return err;
153 err = register_trace_kmem_cache_alloc(kmemtrace_kmem_cache_alloc);
154 if (err)
155 return err;
156 err = register_trace_kmalloc_node(kmemtrace_kmalloc_node);
157 if (err)
158 return err;
159 err = register_trace_kmem_cache_alloc_node(kmemtrace_kmem_cache_alloc_node);
160 if (err)
161 return err;
162 err = register_trace_kfree(kmemtrace_kfree);
163 if (err)
164 return err;
165 err = register_trace_kmem_cache_free(kmemtrace_kmem_cache_free);
166
167 return err;
168}
169
170static void kmemtrace_stop_probes(void)
171{
172 unregister_trace_kmalloc(kmemtrace_kmalloc);
173 unregister_trace_kmem_cache_alloc(kmemtrace_kmem_cache_alloc);
174 unregister_trace_kmalloc_node(kmemtrace_kmalloc_node);
175 unregister_trace_kmem_cache_alloc_node(kmemtrace_kmem_cache_alloc_node);
176 unregister_trace_kfree(kmemtrace_kfree);
177 unregister_trace_kmem_cache_free(kmemtrace_kmem_cache_free);
178}
179
36static int kmem_trace_init(struct trace_array *tr) 180static int kmem_trace_init(struct trace_array *tr)
37{ 181{
38 int cpu; 182 int cpu;
@@ -41,14 +185,14 @@ static int kmem_trace_init(struct trace_array *tr)
41 for_each_cpu_mask(cpu, cpu_possible_map) 185 for_each_cpu_mask(cpu, cpu_possible_map)
42 tracing_reset(tr, cpu); 186 tracing_reset(tr, cpu);
43 187
44 kmem_tracing_enabled = true; 188 kmemtrace_start_probes();
45 189
46 return 0; 190 return 0;
47} 191}
48 192
49static void kmem_trace_reset(struct trace_array *tr) 193static void kmem_trace_reset(struct trace_array *tr)
50{ 194{
51 kmem_tracing_enabled = false; 195 kmemtrace_stop_probes();
52} 196}
53 197
54static void kmemtrace_headers(struct seq_file *s) 198static void kmemtrace_headers(struct seq_file *s)
@@ -66,47 +210,84 @@ static void kmemtrace_headers(struct seq_file *s)
66} 210}
67 211
68/* 212/*
69 * The two following functions give the original output from kmemtrace, 213 * The following functions give the original output from kmemtrace,
70 * or something close to....perhaps they need some missing things 214 * plus the origin CPU, since reordering occurs in-kernel now.
71 */ 215 */
216
217#define KMEMTRACE_USER_ALLOC 0
218#define KMEMTRACE_USER_FREE 1
219
220struct kmemtrace_user_event {
221 u8 event_id;
222 u8 type_id;
223 u16 event_size;
224 u32 cpu;
225 u64 timestamp;
226 unsigned long call_site;
227 unsigned long ptr;
228};
229
230struct kmemtrace_user_event_alloc {
231 size_t bytes_req;
232 size_t bytes_alloc;
233 unsigned gfp_flags;
234 int node;
235};
236
72static enum print_line_t 237static enum print_line_t
73kmemtrace_print_alloc_original(struct trace_iterator *iter, 238kmemtrace_print_alloc_user(struct trace_iterator *iter,
74 struct kmemtrace_alloc_entry *entry) 239 struct kmemtrace_alloc_entry *entry)
75{ 240{
241 struct kmemtrace_user_event_alloc *ev_alloc;
76 struct trace_seq *s = &iter->seq; 242 struct trace_seq *s = &iter->seq;
77 int ret; 243 struct kmemtrace_user_event *ev;
244
245 ev = trace_seq_reserve(s, sizeof(*ev));
246 if (!ev)
247 return TRACE_TYPE_PARTIAL_LINE;
78 248
79 /* Taken from the old linux/kmemtrace.h */ 249 ev->event_id = KMEMTRACE_USER_ALLOC;
80 ret = trace_seq_printf(s, "type_id %d call_site %lu ptr %lu " 250 ev->type_id = entry->type_id;
81 "bytes_req %lu bytes_alloc %lu gfp_flags %lu node %d\n", 251 ev->event_size = sizeof(*ev) + sizeof(*ev_alloc);
82 entry->type_id, entry->call_site, (unsigned long) entry->ptr, 252 ev->cpu = iter->cpu;
83 (unsigned long) entry->bytes_req, (unsigned long) entry->bytes_alloc, 253 ev->timestamp = iter->ts;
84 (unsigned long) entry->gfp_flags, entry->node); 254 ev->call_site = entry->call_site;
255 ev->ptr = (unsigned long)entry->ptr;
85 256
86 if (!ret) 257 ev_alloc = trace_seq_reserve(s, sizeof(*ev_alloc));
258 if (!ev_alloc)
87 return TRACE_TYPE_PARTIAL_LINE; 259 return TRACE_TYPE_PARTIAL_LINE;
88 260
261 ev_alloc->bytes_req = entry->bytes_req;
262 ev_alloc->bytes_alloc = entry->bytes_alloc;
263 ev_alloc->gfp_flags = entry->gfp_flags;
264 ev_alloc->node = entry->node;
265
89 return TRACE_TYPE_HANDLED; 266 return TRACE_TYPE_HANDLED;
90} 267}
91 268
92static enum print_line_t 269static enum print_line_t
93kmemtrace_print_free_original(struct trace_iterator *iter, 270kmemtrace_print_free_user(struct trace_iterator *iter,
94 struct kmemtrace_free_entry *entry) 271 struct kmemtrace_free_entry *entry)
95{ 272{
96 struct trace_seq *s = &iter->seq; 273 struct trace_seq *s = &iter->seq;
97 int ret; 274 struct kmemtrace_user_event *ev;
98 275
99 /* Taken from the old linux/kmemtrace.h */ 276 ev = trace_seq_reserve(s, sizeof(*ev));
100 ret = trace_seq_printf(s, "type_id %d call_site %lu ptr %lu\n", 277 if (!ev)
101 entry->type_id, entry->call_site, (unsigned long) entry->ptr);
102
103 if (!ret)
104 return TRACE_TYPE_PARTIAL_LINE; 278 return TRACE_TYPE_PARTIAL_LINE;
105 279
280 ev->event_id = KMEMTRACE_USER_FREE;
281 ev->type_id = entry->type_id;
282 ev->event_size = sizeof(*ev);
283 ev->cpu = iter->cpu;
284 ev->timestamp = iter->ts;
285 ev->call_site = entry->call_site;
286 ev->ptr = (unsigned long)entry->ptr;
287
106 return TRACE_TYPE_HANDLED; 288 return TRACE_TYPE_HANDLED;
107} 289}
108 290
109
110/* The two other following provide a more minimalistic output */ 291/* The two other following provide a more minimalistic output */
111static enum print_line_t 292static enum print_line_t
112kmemtrace_print_alloc_compress(struct trace_iterator *iter, 293kmemtrace_print_alloc_compress(struct trace_iterator *iter,
@@ -178,7 +359,7 @@ kmemtrace_print_alloc_compress(struct trace_iterator *iter,
178 359
179static enum print_line_t 360static enum print_line_t
180kmemtrace_print_free_compress(struct trace_iterator *iter, 361kmemtrace_print_free_compress(struct trace_iterator *iter,
181 struct kmemtrace_free_entry *entry) 362 struct kmemtrace_free_entry *entry)
182{ 363{
183 struct trace_seq *s = &iter->seq; 364 struct trace_seq *s = &iter->seq;
184 int ret; 365 int ret;
@@ -239,20 +420,22 @@ static enum print_line_t kmemtrace_print_line(struct trace_iterator *iter)
239 switch (entry->type) { 420 switch (entry->type) {
240 case TRACE_KMEM_ALLOC: { 421 case TRACE_KMEM_ALLOC: {
241 struct kmemtrace_alloc_entry *field; 422 struct kmemtrace_alloc_entry *field;
423
242 trace_assign_type(field, entry); 424 trace_assign_type(field, entry);
243 if (kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL) 425 if (kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL)
244 return kmemtrace_print_alloc_compress(iter, field); 426 return kmemtrace_print_alloc_compress(iter, field);
245 else 427 else
246 return kmemtrace_print_alloc_original(iter, field); 428 return kmemtrace_print_alloc_user(iter, field);
247 } 429 }
248 430
249 case TRACE_KMEM_FREE: { 431 case TRACE_KMEM_FREE: {
250 struct kmemtrace_free_entry *field; 432 struct kmemtrace_free_entry *field;
433
251 trace_assign_type(field, entry); 434 trace_assign_type(field, entry);
252 if (kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL) 435 if (kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL)
253 return kmemtrace_print_free_compress(iter, field); 436 return kmemtrace_print_free_compress(iter, field);
254 else 437 else
255 return kmemtrace_print_free_original(iter, field); 438 return kmemtrace_print_free_user(iter, field);
256 } 439 }
257 440
258 default: 441 default:
@@ -260,70 +443,13 @@ static enum print_line_t kmemtrace_print_line(struct trace_iterator *iter)
260 } 443 }
261} 444}
262 445
263/* Trace allocations */
264void kmemtrace_mark_alloc_node(enum kmemtrace_type_id type_id,
265 unsigned long call_site,
266 const void *ptr,
267 size_t bytes_req,
268 size_t bytes_alloc,
269 gfp_t gfp_flags,
270 int node)
271{
272 struct ring_buffer_event *event;
273 struct kmemtrace_alloc_entry *entry;
274 struct trace_array *tr = kmemtrace_array;
275
276 if (!kmem_tracing_enabled)
277 return;
278
279 event = trace_buffer_lock_reserve(tr, TRACE_KMEM_ALLOC,
280 sizeof(*entry), 0, 0);
281 if (!event)
282 return;
283 entry = ring_buffer_event_data(event);
284
285 entry->call_site = call_site;
286 entry->ptr = ptr;
287 entry->bytes_req = bytes_req;
288 entry->bytes_alloc = bytes_alloc;
289 entry->gfp_flags = gfp_flags;
290 entry->node = node;
291
292 trace_buffer_unlock_commit(tr, event, 0, 0);
293}
294EXPORT_SYMBOL(kmemtrace_mark_alloc_node);
295
296void kmemtrace_mark_free(enum kmemtrace_type_id type_id,
297 unsigned long call_site,
298 const void *ptr)
299{
300 struct ring_buffer_event *event;
301 struct kmemtrace_free_entry *entry;
302 struct trace_array *tr = kmemtrace_array;
303
304 if (!kmem_tracing_enabled)
305 return;
306
307 event = trace_buffer_lock_reserve(tr, TRACE_KMEM_FREE,
308 sizeof(*entry), 0, 0);
309 if (!event)
310 return;
311 entry = ring_buffer_event_data(event);
312 entry->type_id = type_id;
313 entry->call_site = call_site;
314 entry->ptr = ptr;
315
316 trace_buffer_unlock_commit(tr, event, 0, 0);
317}
318EXPORT_SYMBOL(kmemtrace_mark_free);
319
320static struct tracer kmem_tracer __read_mostly = { 446static struct tracer kmem_tracer __read_mostly = {
321 .name = "kmemtrace", 447 .name = "kmemtrace",
322 .init = kmem_trace_init, 448 .init = kmem_trace_init,
323 .reset = kmem_trace_reset, 449 .reset = kmem_trace_reset,
324 .print_line = kmemtrace_print_line, 450 .print_line = kmemtrace_print_line,
325 .print_header = kmemtrace_headers, 451 .print_header = kmemtrace_headers,
326 .flags = &kmem_tracer_flags 452 .flags = &kmem_tracer_flags
327}; 453};
328 454
329void kmemtrace_init(void) 455void kmemtrace_init(void)
@@ -335,5 +461,4 @@ static int __init init_kmem_tracer(void)
335{ 461{
336 return register_tracer(&kmem_tracer); 462 return register_tracer(&kmem_tracer);
337} 463}
338
339device_initcall(init_kmem_tracer); 464device_initcall(init_kmem_tracer);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 808b14bbf076..960cbf44c844 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -189,16 +189,65 @@ enum {
189 RB_LEN_TIME_STAMP = 16, 189 RB_LEN_TIME_STAMP = 16,
190}; 190};
191 191
192/* inline for ring buffer fast paths */ 192static inline int rb_null_event(struct ring_buffer_event *event)
193{
194 return event->type == RINGBUF_TYPE_PADDING && event->time_delta == 0;
195}
196
197static inline int rb_discarded_event(struct ring_buffer_event *event)
198{
199 return event->type == RINGBUF_TYPE_PADDING && event->time_delta;
200}
201
202static void rb_event_set_padding(struct ring_buffer_event *event)
203{
204 event->type = RINGBUF_TYPE_PADDING;
205 event->time_delta = 0;
206}
207
208/**
209 * ring_buffer_event_discard - discard an event in the ring buffer
210 * @buffer: the ring buffer
211 * @event: the event to discard
212 *
213 * Sometimes a event that is in the ring buffer needs to be ignored.
214 * This function lets the user discard an event in the ring buffer
215 * and then that event will not be read later.
216 *
217 * Note, it is up to the user to be careful with this, and protect
218 * against races. If the user discards an event that has been consumed
219 * it is possible that it could corrupt the ring buffer.
220 */
221void ring_buffer_event_discard(struct ring_buffer_event *event)
222{
223 event->type = RINGBUF_TYPE_PADDING;
224 /* time delta must be non zero */
225 if (!event->time_delta)
226 event->time_delta = 1;
227}
228
193static unsigned 229static unsigned
194rb_event_length(struct ring_buffer_event *event) 230rb_event_data_length(struct ring_buffer_event *event)
195{ 231{
196 unsigned length; 232 unsigned length;
197 233
234 if (event->len)
235 length = event->len * RB_ALIGNMENT;
236 else
237 length = event->array[0];
238 return length + RB_EVNT_HDR_SIZE;
239}
240
241/* inline for ring buffer fast paths */
242static unsigned
243rb_event_length(struct ring_buffer_event *event)
244{
198 switch (event->type) { 245 switch (event->type) {
199 case RINGBUF_TYPE_PADDING: 246 case RINGBUF_TYPE_PADDING:
200 /* undefined */ 247 if (rb_null_event(event))
201 return -1; 248 /* undefined */
249 return -1;
250 return rb_event_data_length(event);
202 251
203 case RINGBUF_TYPE_TIME_EXTEND: 252 case RINGBUF_TYPE_TIME_EXTEND:
204 return RB_LEN_TIME_EXTEND; 253 return RB_LEN_TIME_EXTEND;
@@ -207,11 +256,7 @@ rb_event_length(struct ring_buffer_event *event)
207 return RB_LEN_TIME_STAMP; 256 return RB_LEN_TIME_STAMP;
208 257
209 case RINGBUF_TYPE_DATA: 258 case RINGBUF_TYPE_DATA:
210 if (event->len) 259 return rb_event_data_length(event);
211 length = event->len * RB_ALIGNMENT;
212 else
213 length = event->array[0];
214 return length + RB_EVNT_HDR_SIZE;
215 default: 260 default:
216 BUG(); 261 BUG();
217 } 262 }
@@ -518,7 +563,6 @@ static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
518 struct list_head *head = &cpu_buffer->pages; 563 struct list_head *head = &cpu_buffer->pages;
519 struct buffer_page *bpage, *tmp; 564 struct buffer_page *bpage, *tmp;
520 565
521 list_del_init(&cpu_buffer->reader_page->list);
522 free_buffer_page(cpu_buffer->reader_page); 566 free_buffer_page(cpu_buffer->reader_page);
523 567
524 list_for_each_entry_safe(bpage, tmp, head, list) { 568 list_for_each_entry_safe(bpage, tmp, head, list) {
@@ -845,11 +889,6 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
845} 889}
846EXPORT_SYMBOL_GPL(ring_buffer_resize); 890EXPORT_SYMBOL_GPL(ring_buffer_resize);
847 891
848static inline int rb_null_event(struct ring_buffer_event *event)
849{
850 return event->type == RINGBUF_TYPE_PADDING;
851}
852
853static inline void * 892static inline void *
854__rb_data_page_index(struct buffer_data_page *bpage, unsigned index) 893__rb_data_page_index(struct buffer_data_page *bpage, unsigned index)
855{ 894{
@@ -1219,7 +1258,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
1219 if (tail < BUF_PAGE_SIZE) { 1258 if (tail < BUF_PAGE_SIZE) {
1220 /* Mark the rest of the page with padding */ 1259 /* Mark the rest of the page with padding */
1221 event = __rb_page_index(tail_page, tail); 1260 event = __rb_page_index(tail_page, tail);
1222 event->type = RINGBUF_TYPE_PADDING; 1261 rb_event_set_padding(event);
1223 } 1262 }
1224 1263
1225 if (tail <= BUF_PAGE_SIZE) 1264 if (tail <= BUF_PAGE_SIZE)
@@ -1969,7 +2008,7 @@ static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer)
1969 2008
1970 event = rb_reader_event(cpu_buffer); 2009 event = rb_reader_event(cpu_buffer);
1971 2010
1972 if (event->type == RINGBUF_TYPE_DATA) 2011 if (event->type == RINGBUF_TYPE_DATA || rb_discarded_event(event))
1973 cpu_buffer->entries--; 2012 cpu_buffer->entries--;
1974 2013
1975 rb_update_read_stamp(cpu_buffer, event); 2014 rb_update_read_stamp(cpu_buffer, event);
@@ -2052,9 +2091,18 @@ rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
2052 2091
2053 switch (event->type) { 2092 switch (event->type) {
2054 case RINGBUF_TYPE_PADDING: 2093 case RINGBUF_TYPE_PADDING:
2055 RB_WARN_ON(cpu_buffer, 1); 2094 if (rb_null_event(event))
2095 RB_WARN_ON(cpu_buffer, 1);
2096 /*
2097 * Because the writer could be discarding every
2098 * event it creates (which would probably be bad)
2099 * if we were to go back to "again" then we may never
2100 * catch up, and will trigger the warn on, or lock
2101 * the box. Return the padding, and we will release
2102 * the current locks, and try again.
2103 */
2056 rb_advance_reader(cpu_buffer); 2104 rb_advance_reader(cpu_buffer);
2057 return NULL; 2105 return event;
2058 2106
2059 case RINGBUF_TYPE_TIME_EXTEND: 2107 case RINGBUF_TYPE_TIME_EXTEND:
2060 /* Internal data, OK to advance */ 2108 /* Internal data, OK to advance */
@@ -2115,8 +2163,12 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
2115 2163
2116 switch (event->type) { 2164 switch (event->type) {
2117 case RINGBUF_TYPE_PADDING: 2165 case RINGBUF_TYPE_PADDING:
2118 rb_inc_iter(iter); 2166 if (rb_null_event(event)) {
2119 goto again; 2167 rb_inc_iter(iter);
2168 goto again;
2169 }
2170 rb_advance_iter(iter);
2171 return event;
2120 2172
2121 case RINGBUF_TYPE_TIME_EXTEND: 2173 case RINGBUF_TYPE_TIME_EXTEND:
2122 /* Internal data, OK to advance */ 2174 /* Internal data, OK to advance */
@@ -2163,10 +2215,16 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
2163 if (!cpumask_test_cpu(cpu, buffer->cpumask)) 2215 if (!cpumask_test_cpu(cpu, buffer->cpumask))
2164 return NULL; 2216 return NULL;
2165 2217
2218 again:
2166 spin_lock_irqsave(&cpu_buffer->reader_lock, flags); 2219 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
2167 event = rb_buffer_peek(buffer, cpu, ts); 2220 event = rb_buffer_peek(buffer, cpu, ts);
2168 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 2221 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
2169 2222
2223 if (event && event->type == RINGBUF_TYPE_PADDING) {
2224 cpu_relax();
2225 goto again;
2226 }
2227
2170 return event; 2228 return event;
2171} 2229}
2172 2230
@@ -2185,10 +2243,16 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
2185 struct ring_buffer_event *event; 2243 struct ring_buffer_event *event;
2186 unsigned long flags; 2244 unsigned long flags;
2187 2245
2246 again:
2188 spin_lock_irqsave(&cpu_buffer->reader_lock, flags); 2247 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
2189 event = rb_iter_peek(iter, ts); 2248 event = rb_iter_peek(iter, ts);
2190 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 2249 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
2191 2250
2251 if (event && event->type == RINGBUF_TYPE_PADDING) {
2252 cpu_relax();
2253 goto again;
2254 }
2255
2192 return event; 2256 return event;
2193} 2257}
2194 2258
@@ -2207,6 +2271,7 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
2207 struct ring_buffer_event *event = NULL; 2271 struct ring_buffer_event *event = NULL;
2208 unsigned long flags; 2272 unsigned long flags;
2209 2273
2274 again:
2210 /* might be called in atomic */ 2275 /* might be called in atomic */
2211 preempt_disable(); 2276 preempt_disable();
2212 2277
@@ -2228,6 +2293,11 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
2228 out: 2293 out:
2229 preempt_enable(); 2294 preempt_enable();
2230 2295
2296 if (event && event->type == RINGBUF_TYPE_PADDING) {
2297 cpu_relax();
2298 goto again;
2299 }
2300
2231 return event; 2301 return event;
2232} 2302}
2233EXPORT_SYMBOL_GPL(ring_buffer_consume); 2303EXPORT_SYMBOL_GPL(ring_buffer_consume);
@@ -2306,6 +2376,7 @@ ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts)
2306 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; 2376 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
2307 unsigned long flags; 2377 unsigned long flags;
2308 2378
2379 again:
2309 spin_lock_irqsave(&cpu_buffer->reader_lock, flags); 2380 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
2310 event = rb_iter_peek(iter, ts); 2381 event = rb_iter_peek(iter, ts);
2311 if (!event) 2382 if (!event)
@@ -2315,6 +2386,11 @@ ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts)
2315 out: 2386 out:
2316 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 2387 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
2317 2388
2389 if (event && event->type == RINGBUF_TYPE_PADDING) {
2390 cpu_relax();
2391 goto again;
2392 }
2393
2318 return event; 2394 return event;
2319} 2395}
2320EXPORT_SYMBOL_GPL(ring_buffer_read); 2396EXPORT_SYMBOL_GPL(ring_buffer_read);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 5d1a16cae376..2a81decf99bc 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -852,15 +852,25 @@ static void ftrace_trace_stack(struct trace_array *tr,
852static void ftrace_trace_userstack(struct trace_array *tr, 852static void ftrace_trace_userstack(struct trace_array *tr,
853 unsigned long flags, int pc); 853 unsigned long flags, int pc);
854 854
855void trace_buffer_unlock_commit(struct trace_array *tr, 855static inline void __trace_buffer_unlock_commit(struct trace_array *tr,
856 struct ring_buffer_event *event, 856 struct ring_buffer_event *event,
857 unsigned long flags, int pc) 857 unsigned long flags, int pc,
858 int wake)
858{ 859{
859 ring_buffer_unlock_commit(tr->buffer, event); 860 ring_buffer_unlock_commit(tr->buffer, event);
860 861
861 ftrace_trace_stack(tr, flags, 6, pc); 862 ftrace_trace_stack(tr, flags, 6, pc);
862 ftrace_trace_userstack(tr, flags, pc); 863 ftrace_trace_userstack(tr, flags, pc);
863 trace_wake_up(); 864
865 if (wake)
866 trace_wake_up();
867}
868
869void trace_buffer_unlock_commit(struct trace_array *tr,
870 struct ring_buffer_event *event,
871 unsigned long flags, int pc)
872{
873 __trace_buffer_unlock_commit(tr, event, flags, pc, 1);
864} 874}
865 875
866struct ring_buffer_event * 876struct ring_buffer_event *
@@ -874,7 +884,13 @@ trace_current_buffer_lock_reserve(unsigned char type, unsigned long len,
874void trace_current_buffer_unlock_commit(struct ring_buffer_event *event, 884void trace_current_buffer_unlock_commit(struct ring_buffer_event *event,
875 unsigned long flags, int pc) 885 unsigned long flags, int pc)
876{ 886{
877 return trace_buffer_unlock_commit(&global_trace, event, flags, pc); 887 return __trace_buffer_unlock_commit(&global_trace, event, flags, pc, 1);
888}
889
890void trace_nowake_buffer_unlock_commit(struct ring_buffer_event *event,
891 unsigned long flags, int pc)
892{
893 return __trace_buffer_unlock_commit(&global_trace, event, flags, pc, 0);
878} 894}
879 895
880void 896void
@@ -900,7 +916,7 @@ trace_function(struct trace_array *tr,
900} 916}
901 917
902#ifdef CONFIG_FUNCTION_GRAPH_TRACER 918#ifdef CONFIG_FUNCTION_GRAPH_TRACER
903static void __trace_graph_entry(struct trace_array *tr, 919static int __trace_graph_entry(struct trace_array *tr,
904 struct ftrace_graph_ent *trace, 920 struct ftrace_graph_ent *trace,
905 unsigned long flags, 921 unsigned long flags,
906 int pc) 922 int pc)
@@ -909,15 +925,17 @@ static void __trace_graph_entry(struct trace_array *tr,
909 struct ftrace_graph_ent_entry *entry; 925 struct ftrace_graph_ent_entry *entry;
910 926
911 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) 927 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
912 return; 928 return 0;
913 929
914 event = trace_buffer_lock_reserve(&global_trace, TRACE_GRAPH_ENT, 930 event = trace_buffer_lock_reserve(&global_trace, TRACE_GRAPH_ENT,
915 sizeof(*entry), flags, pc); 931 sizeof(*entry), flags, pc);
916 if (!event) 932 if (!event)
917 return; 933 return 0;
918 entry = ring_buffer_event_data(event); 934 entry = ring_buffer_event_data(event);
919 entry->graph_ent = *trace; 935 entry->graph_ent = *trace;
920 ring_buffer_unlock_commit(global_trace.buffer, event); 936 ring_buffer_unlock_commit(global_trace.buffer, event);
937
938 return 1;
921} 939}
922 940
923static void __trace_graph_return(struct trace_array *tr, 941static void __trace_graph_return(struct trace_array *tr,
@@ -1138,6 +1156,7 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
1138 struct trace_array_cpu *data; 1156 struct trace_array_cpu *data;
1139 unsigned long flags; 1157 unsigned long flags;
1140 long disabled; 1158 long disabled;
1159 int ret;
1141 int cpu; 1160 int cpu;
1142 int pc; 1161 int pc;
1143 1162
@@ -1153,15 +1172,18 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
1153 disabled = atomic_inc_return(&data->disabled); 1172 disabled = atomic_inc_return(&data->disabled);
1154 if (likely(disabled == 1)) { 1173 if (likely(disabled == 1)) {
1155 pc = preempt_count(); 1174 pc = preempt_count();
1156 __trace_graph_entry(tr, trace, flags, pc); 1175 ret = __trace_graph_entry(tr, trace, flags, pc);
1176 } else {
1177 ret = 0;
1157 } 1178 }
1158 /* Only do the atomic if it is not already set */ 1179 /* Only do the atomic if it is not already set */
1159 if (!test_tsk_trace_graph(current)) 1180 if (!test_tsk_trace_graph(current))
1160 set_tsk_trace_graph(current); 1181 set_tsk_trace_graph(current);
1182
1161 atomic_dec(&data->disabled); 1183 atomic_dec(&data->disabled);
1162 local_irq_restore(flags); 1184 local_irq_restore(flags);
1163 1185
1164 return 1; 1186 return ret;
1165} 1187}
1166 1188
1167void trace_graph_return(struct ftrace_graph_ret *trace) 1189void trace_graph_return(struct ftrace_graph_ret *trace)
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index e3429a8ab059..fec6521ffa13 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -182,6 +182,12 @@ struct trace_power {
182 struct power_trace state_data; 182 struct power_trace state_data;
183}; 183};
184 184
185enum kmemtrace_type_id {
186 KMEMTRACE_TYPE_KMALLOC = 0, /* kmalloc() or kfree(). */
187 KMEMTRACE_TYPE_CACHE, /* kmem_cache_*(). */
188 KMEMTRACE_TYPE_PAGES, /* __get_free_pages() and friends. */
189};
190
185struct kmemtrace_alloc_entry { 191struct kmemtrace_alloc_entry {
186 struct trace_entry ent; 192 struct trace_entry ent;
187 enum kmemtrace_type_id type_id; 193 enum kmemtrace_type_id type_id;
@@ -483,6 +489,8 @@ trace_current_buffer_lock_reserve(unsigned char type, unsigned long len,
483 unsigned long flags, int pc); 489 unsigned long flags, int pc);
484void trace_current_buffer_unlock_commit(struct ring_buffer_event *event, 490void trace_current_buffer_unlock_commit(struct ring_buffer_event *event,
485 unsigned long flags, int pc); 491 unsigned long flags, int pc);
492void trace_nowake_buffer_unlock_commit(struct ring_buffer_event *event,
493 unsigned long flags, int pc);
486 494
487struct trace_entry *tracing_get_trace_entry(struct trace_array *tr, 495struct trace_entry *tracing_get_trace_entry(struct trace_array *tr,
488 struct trace_array_cpu *data); 496 struct trace_array_cpu *data);
@@ -778,16 +786,27 @@ enum {
778 TRACE_EVENT_TYPE_RAW = 2, 786 TRACE_EVENT_TYPE_RAW = 2,
779}; 787};
780 788
789struct ftrace_event_field {
790 struct list_head link;
791 char *name;
792 char *type;
793 int offset;
794 int size;
795};
796
781struct ftrace_event_call { 797struct ftrace_event_call {
782 char *name; 798 char *name;
783 char *system; 799 char *system;
784 struct dentry *dir; 800 struct dentry *dir;
785 int enabled; 801 int enabled;
786 int (*regfunc)(void); 802 int (*regfunc)(void);
787 void (*unregfunc)(void); 803 void (*unregfunc)(void);
788 int id; 804 int id;
789 int (*raw_init)(void); 805 int (*raw_init)(void);
790 int (*show_format)(struct trace_seq *s); 806 int (*show_format)(struct trace_seq *s);
807 int (*define_fields)(void);
808 struct list_head fields;
809 struct filter_pred **preds;
791 810
792#ifdef CONFIG_EVENT_PROFILE 811#ifdef CONFIG_EVENT_PROFILE
793 atomic_t profile_count; 812 atomic_t profile_count;
@@ -796,6 +815,51 @@ struct ftrace_event_call {
796#endif 815#endif
797}; 816};
798 817
818struct event_subsystem {
819 struct list_head list;
820 const char *name;
821 struct dentry *entry;
822 struct filter_pred **preds;
823};
824
825#define events_for_each(event) \
826 for (event = __start_ftrace_events; \
827 (unsigned long)event < (unsigned long)__stop_ftrace_events; \
828 event++)
829
830#define MAX_FILTER_PRED 8
831
832struct filter_pred;
833
834typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event);
835
836struct filter_pred {
837 filter_pred_fn_t fn;
838 u64 val;
839 char *str_val;
840 int str_len;
841 char *field_name;
842 int offset;
843 int not;
844 int or;
845 int compound;
846 int clear;
847};
848
849int trace_define_field(struct ftrace_event_call *call, char *type,
850 char *name, int offset, int size);
851extern void filter_free_pred(struct filter_pred *pred);
852extern void filter_print_preds(struct filter_pred **preds,
853 struct trace_seq *s);
854extern int filter_parse(char **pbuf, struct filter_pred *pred);
855extern int filter_add_pred(struct ftrace_event_call *call,
856 struct filter_pred *pred);
857extern void filter_free_preds(struct ftrace_event_call *call);
858extern int filter_match_preds(struct ftrace_event_call *call, void *rec);
859extern void filter_free_subsystem_preds(struct event_subsystem *system);
860extern int filter_add_subsystem_pred(struct event_subsystem *system,
861 struct filter_pred *pred);
862
799void event_trace_printk(unsigned long ip, const char *fmt, ...); 863void event_trace_printk(unsigned long ip, const char *fmt, ...);
800extern struct ftrace_event_call __start_ftrace_events[]; 864extern struct ftrace_event_call __start_ftrace_events[];
801extern struct ftrace_event_call __stop_ftrace_events[]; 865extern struct ftrace_event_call __stop_ftrace_events[];
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 3047b56f6637..64ec4d278ffb 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -19,6 +19,39 @@
19 19
20static DEFINE_MUTEX(event_mutex); 20static DEFINE_MUTEX(event_mutex);
21 21
22int trace_define_field(struct ftrace_event_call *call, char *type,
23 char *name, int offset, int size)
24{
25 struct ftrace_event_field *field;
26
27 field = kzalloc(sizeof(*field), GFP_KERNEL);
28 if (!field)
29 goto err;
30
31 field->name = kstrdup(name, GFP_KERNEL);
32 if (!field->name)
33 goto err;
34
35 field->type = kstrdup(type, GFP_KERNEL);
36 if (!field->type)
37 goto err;
38
39 field->offset = offset;
40 field->size = size;
41 list_add(&field->link, &call->fields);
42
43 return 0;
44
45err:
46 if (field) {
47 kfree(field->name);
48 kfree(field->type);
49 }
50 kfree(field);
51
52 return -ENOMEM;
53}
54
22static void ftrace_clear_events(void) 55static void ftrace_clear_events(void)
23{ 56{
24 struct ftrace_event_call *call = (void *)__start_ftrace_events; 57 struct ftrace_event_call *call = (void *)__start_ftrace_events;
@@ -343,7 +376,8 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
343 376
344#undef FIELD 377#undef FIELD
345#define FIELD(type, name) \ 378#define FIELD(type, name) \
346 #type, #name, offsetof(typeof(field), name), sizeof(field.name) 379 #type, "common_" #name, offsetof(typeof(field), name), \
380 sizeof(field.name)
347 381
348static int trace_write_header(struct trace_seq *s) 382static int trace_write_header(struct trace_seq *s)
349{ 383{
@@ -430,6 +464,139 @@ event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
430 return r; 464 return r;
431} 465}
432 466
467static ssize_t
468event_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
469 loff_t *ppos)
470{
471 struct ftrace_event_call *call = filp->private_data;
472 struct trace_seq *s;
473 int r;
474
475 if (*ppos)
476 return 0;
477
478 s = kmalloc(sizeof(*s), GFP_KERNEL);
479 if (!s)
480 return -ENOMEM;
481
482 trace_seq_init(s);
483
484 filter_print_preds(call->preds, s);
485 r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
486
487 kfree(s);
488
489 return r;
490}
491
492static ssize_t
493event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
494 loff_t *ppos)
495{
496 struct ftrace_event_call *call = filp->private_data;
497 char buf[64], *pbuf = buf;
498 struct filter_pred *pred;
499 int err;
500
501 if (cnt >= sizeof(buf))
502 return -EINVAL;
503
504 if (copy_from_user(&buf, ubuf, cnt))
505 return -EFAULT;
506
507 pred = kzalloc(sizeof(*pred), GFP_KERNEL);
508 if (!pred)
509 return -ENOMEM;
510
511 err = filter_parse(&pbuf, pred);
512 if (err < 0) {
513 filter_free_pred(pred);
514 return err;
515 }
516
517 if (pred->clear) {
518 filter_free_preds(call);
519 filter_free_pred(pred);
520 return cnt;
521 }
522
523 if (filter_add_pred(call, pred)) {
524 filter_free_pred(pred);
525 return -EINVAL;
526 }
527
528 *ppos += cnt;
529
530 return cnt;
531}
532
533static ssize_t
534subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
535 loff_t *ppos)
536{
537 struct event_subsystem *system = filp->private_data;
538 struct trace_seq *s;
539 int r;
540
541 if (*ppos)
542 return 0;
543
544 s = kmalloc(sizeof(*s), GFP_KERNEL);
545 if (!s)
546 return -ENOMEM;
547
548 trace_seq_init(s);
549
550 filter_print_preds(system->preds, s);
551 r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
552
553 kfree(s);
554
555 return r;
556}
557
558static ssize_t
559subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
560 loff_t *ppos)
561{
562 struct event_subsystem *system = filp->private_data;
563 char buf[64], *pbuf = buf;
564 struct filter_pred *pred;
565 int err;
566
567 if (cnt >= sizeof(buf))
568 return -EINVAL;
569
570 if (copy_from_user(&buf, ubuf, cnt))
571 return -EFAULT;
572
573 pred = kzalloc(sizeof(*pred), GFP_KERNEL);
574 if (!pred)
575 return -ENOMEM;
576
577 err = filter_parse(&pbuf, pred);
578 if (err < 0) {
579 filter_free_pred(pred);
580 return err;
581 }
582
583 if (pred->clear) {
584 filter_free_subsystem_preds(system);
585 filter_free_pred(pred);
586 return cnt;
587 }
588
589 if (filter_add_subsystem_pred(system, pred)) {
590 filter_free_subsystem_preds(system);
591 filter_free_pred(pred);
592 return -EINVAL;
593 }
594
595 *ppos += cnt;
596
597 return cnt;
598}
599
433static const struct seq_operations show_event_seq_ops = { 600static const struct seq_operations show_event_seq_ops = {
434 .start = t_start, 601 .start = t_start,
435 .next = t_next, 602 .next = t_next,
@@ -475,6 +642,18 @@ static const struct file_operations ftrace_event_id_fops = {
475 .read = event_id_read, 642 .read = event_id_read,
476}; 643};
477 644
645static const struct file_operations ftrace_event_filter_fops = {
646 .open = tracing_open_generic,
647 .read = event_filter_read,
648 .write = event_filter_write,
649};
650
651static const struct file_operations ftrace_subsystem_filter_fops = {
652 .open = tracing_open_generic,
653 .read = subsystem_filter_read,
654 .write = subsystem_filter_write,
655};
656
478static struct dentry *event_trace_events_dir(void) 657static struct dentry *event_trace_events_dir(void)
479{ 658{
480 static struct dentry *d_tracer; 659 static struct dentry *d_tracer;
@@ -495,12 +674,6 @@ static struct dentry *event_trace_events_dir(void)
495 return d_events; 674 return d_events;
496} 675}
497 676
498struct event_subsystem {
499 struct list_head list;
500 const char *name;
501 struct dentry *entry;
502};
503
504static LIST_HEAD(event_subsystems); 677static LIST_HEAD(event_subsystems);
505 678
506static struct dentry * 679static struct dentry *
@@ -533,6 +706,8 @@ event_subsystem_dir(const char *name, struct dentry *d_events)
533 system->name = name; 706 system->name = name;
534 list_add(&system->list, &event_subsystems); 707 list_add(&system->list, &event_subsystems);
535 708
709 system->preds = NULL;
710
536 return system->entry; 711 return system->entry;
537} 712}
538 713
@@ -581,6 +756,20 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events)
581 call->name); 756 call->name);
582 } 757 }
583 758
759 if (call->define_fields) {
760 ret = call->define_fields();
761 if (ret < 0) {
762 pr_warning("Could not initialize trace point"
763 " events/%s\n", call->name);
764 return ret;
765 }
766 entry = debugfs_create_file("filter", 0644, call->dir, call,
767 &ftrace_event_filter_fops);
768 if (!entry)
769 pr_warning("Could not create debugfs "
770 "'%s/filter' entry\n", call->name);
771 }
772
584 /* A trace may not want to export its format */ 773 /* A trace may not want to export its format */
585 if (!call->show_format) 774 if (!call->show_format)
586 return 0; 775 return 0;
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
new file mode 100644
index 000000000000..026be412f356
--- /dev/null
+++ b/kernel/trace/trace_events_filter.c
@@ -0,0 +1,427 @@
1/*
2 * trace_events_filter - generic event filtering
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * Copyright (C) 2009 Tom Zanussi <tzanussi@gmail.com>
19 */
20
21#include <linux/debugfs.h>
22#include <linux/uaccess.h>
23#include <linux/module.h>
24#include <linux/ctype.h>
25
26#include "trace.h"
27#include "trace_output.h"
28
29static int filter_pred_64(struct filter_pred *pred, void *event)
30{
31 u64 *addr = (u64 *)(event + pred->offset);
32 u64 val = (u64)pred->val;
33 int match;
34
35 match = (val == *addr) ^ pred->not;
36
37 return match;
38}
39
40static int filter_pred_32(struct filter_pred *pred, void *event)
41{
42 u32 *addr = (u32 *)(event + pred->offset);
43 u32 val = (u32)pred->val;
44 int match;
45
46 match = (val == *addr) ^ pred->not;
47
48 return match;
49}
50
51static int filter_pred_16(struct filter_pred *pred, void *event)
52{
53 u16 *addr = (u16 *)(event + pred->offset);
54 u16 val = (u16)pred->val;
55 int match;
56
57 match = (val == *addr) ^ pred->not;
58
59 return match;
60}
61
62static int filter_pred_8(struct filter_pred *pred, void *event)
63{
64 u8 *addr = (u8 *)(event + pred->offset);
65 u8 val = (u8)pred->val;
66 int match;
67
68 match = (val == *addr) ^ pred->not;
69
70 return match;
71}
72
73static int filter_pred_string(struct filter_pred *pred, void *event)
74{
75 char *addr = (char *)(event + pred->offset);
76 int cmp, match;
77
78 cmp = strncmp(addr, pred->str_val, pred->str_len);
79
80 match = (!cmp) ^ pred->not;
81
82 return match;
83}
84
85/* return 1 if event matches, 0 otherwise (discard) */
86int filter_match_preds(struct ftrace_event_call *call, void *rec)
87{
88 int i, matched, and_failed = 0;
89 struct filter_pred *pred;
90
91 for (i = 0; i < MAX_FILTER_PRED; i++) {
92 if (call->preds[i]) {
93 pred = call->preds[i];
94 if (and_failed && !pred->or)
95 continue;
96 matched = pred->fn(pred, rec);
97 if (!matched && !pred->or) {
98 and_failed = 1;
99 continue;
100 } else if (matched && pred->or)
101 return 1;
102 } else
103 break;
104 }
105
106 if (and_failed)
107 return 0;
108
109 return 1;
110}
111
112void filter_print_preds(struct filter_pred **preds, struct trace_seq *s)
113{
114 char *field_name;
115 struct filter_pred *pred;
116 int i;
117
118 if (!preds) {
119 trace_seq_printf(s, "none\n");
120 return;
121 }
122
123 for (i = 0; i < MAX_FILTER_PRED; i++) {
124 if (preds[i]) {
125 pred = preds[i];
126 field_name = pred->field_name;
127 if (i)
128 trace_seq_printf(s, pred->or ? "|| " : "&& ");
129 trace_seq_printf(s, "%s ", field_name);
130 trace_seq_printf(s, pred->not ? "!= " : "== ");
131 if (pred->str_val)
132 trace_seq_printf(s, "%s\n", pred->str_val);
133 else
134 trace_seq_printf(s, "%llu\n", pred->val);
135 } else
136 break;
137 }
138}
139
140static struct ftrace_event_field *
141find_event_field(struct ftrace_event_call *call, char *name)
142{
143 struct ftrace_event_field *field;
144
145 list_for_each_entry(field, &call->fields, link) {
146 if (!strcmp(field->name, name))
147 return field;
148 }
149
150 return NULL;
151}
152
153void filter_free_pred(struct filter_pred *pred)
154{
155 if (!pred)
156 return;
157
158 kfree(pred->field_name);
159 kfree(pred->str_val);
160 kfree(pred);
161}
162
163void filter_free_preds(struct ftrace_event_call *call)
164{
165 int i;
166
167 if (call->preds) {
168 for (i = 0; i < MAX_FILTER_PRED; i++)
169 filter_free_pred(call->preds[i]);
170 kfree(call->preds);
171 call->preds = NULL;
172 }
173}
174
175void filter_free_subsystem_preds(struct event_subsystem *system)
176{
177 struct ftrace_event_call *call = __start_ftrace_events;
178 int i;
179
180 if (system->preds) {
181 for (i = 0; i < MAX_FILTER_PRED; i++)
182 filter_free_pred(system->preds[i]);
183 kfree(system->preds);
184 system->preds = NULL;
185 }
186
187 events_for_each(call) {
188 if (!call->name || !call->regfunc)
189 continue;
190
191 if (!strcmp(call->system, system->name))
192 filter_free_preds(call);
193 }
194}
195
196static int __filter_add_pred(struct ftrace_event_call *call,
197 struct filter_pred *pred)
198{
199 int i;
200
201 if (call->preds && !pred->compound)
202 filter_free_preds(call);
203
204 if (!call->preds) {
205 call->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred),
206 GFP_KERNEL);
207 if (!call->preds)
208 return -ENOMEM;
209 }
210
211 for (i = 0; i < MAX_FILTER_PRED; i++) {
212 if (!call->preds[i]) {
213 call->preds[i] = pred;
214 return 0;
215 }
216 }
217
218 return -ENOMEM;
219}
220
221static int is_string_field(const char *type)
222{
223 if (strchr(type, '[') && strstr(type, "char"))
224 return 1;
225
226 return 0;
227}
228
229int filter_add_pred(struct ftrace_event_call *call, struct filter_pred *pred)
230{
231 struct ftrace_event_field *field;
232
233 field = find_event_field(call, pred->field_name);
234 if (!field)
235 return -EINVAL;
236
237 pred->offset = field->offset;
238
239 if (is_string_field(field->type)) {
240 if (!pred->str_val)
241 return -EINVAL;
242 pred->fn = filter_pred_string;
243 pred->str_len = field->size;
244 return __filter_add_pred(call, pred);
245 } else {
246 if (pred->str_val)
247 return -EINVAL;
248 }
249
250 switch (field->size) {
251 case 8:
252 pred->fn = filter_pred_64;
253 break;
254 case 4:
255 pred->fn = filter_pred_32;
256 break;
257 case 2:
258 pred->fn = filter_pred_16;
259 break;
260 case 1:
261 pred->fn = filter_pred_8;
262 break;
263 default:
264 return -EINVAL;
265 }
266
267 return __filter_add_pred(call, pred);
268}
269
270static struct filter_pred *copy_pred(struct filter_pred *pred)
271{
272 struct filter_pred *new_pred = kmalloc(sizeof(*pred), GFP_KERNEL);
273 if (!new_pred)
274 return NULL;
275
276 memcpy(new_pred, pred, sizeof(*pred));
277
278 if (pred->field_name) {
279 new_pred->field_name = kstrdup(pred->field_name, GFP_KERNEL);
280 if (!new_pred->field_name) {
281 kfree(new_pred);
282 return NULL;
283 }
284 }
285
286 if (pred->str_val) {
287 new_pred->str_val = kstrdup(pred->str_val, GFP_KERNEL);
288 if (!new_pred->str_val) {
289 filter_free_pred(new_pred);
290 return NULL;
291 }
292 }
293
294 return new_pred;
295}
296
297int filter_add_subsystem_pred(struct event_subsystem *system,
298 struct filter_pred *pred)
299{
300 struct ftrace_event_call *call = __start_ftrace_events;
301 struct filter_pred *event_pred;
302 int i;
303
304 if (system->preds && !pred->compound)
305 filter_free_subsystem_preds(system);
306
307 if (!system->preds) {
308 system->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred),
309 GFP_KERNEL);
310 if (!system->preds)
311 return -ENOMEM;
312 }
313
314 for (i = 0; i < MAX_FILTER_PRED; i++) {
315 if (!system->preds[i]) {
316 system->preds[i] = pred;
317 break;
318 }
319 }
320
321 if (i == MAX_FILTER_PRED)
322 return -EINVAL;
323
324 events_for_each(call) {
325 int err;
326
327 if (!call->name || !call->regfunc)
328 continue;
329
330 if (strcmp(call->system, system->name))
331 continue;
332
333 if (!find_event_field(call, pred->field_name))
334 continue;
335
336 event_pred = copy_pred(pred);
337 if (!event_pred)
338 goto oom;
339
340 err = filter_add_pred(call, event_pred);
341 if (err)
342 filter_free_pred(event_pred);
343 if (err == -ENOMEM)
344 goto oom;
345 }
346
347 return 0;
348
349oom:
350 system->preds[i] = NULL;
351 return -ENOMEM;
352}
353
354int filter_parse(char **pbuf, struct filter_pred *pred)
355{
356 char *tmp, *tok, *val_str = NULL;
357 int tok_n = 0;
358
359 /* field ==/!= number, or/and field ==/!= number, number */
360 while ((tok = strsep(pbuf, " \n"))) {
361 if (tok_n == 0) {
362 if (!strcmp(tok, "0")) {
363 pred->clear = 1;
364 return 0;
365 } else if (!strcmp(tok, "&&")) {
366 pred->or = 0;
367 pred->compound = 1;
368 } else if (!strcmp(tok, "||")) {
369 pred->or = 1;
370 pred->compound = 1;
371 } else
372 pred->field_name = tok;
373 tok_n = 1;
374 continue;
375 }
376 if (tok_n == 1) {
377 if (!pred->field_name)
378 pred->field_name = tok;
379 else if (!strcmp(tok, "!="))
380 pred->not = 1;
381 else if (!strcmp(tok, "=="))
382 pred->not = 0;
383 else {
384 pred->field_name = NULL;
385 return -EINVAL;
386 }
387 tok_n = 2;
388 continue;
389 }
390 if (tok_n == 2) {
391 if (pred->compound) {
392 if (!strcmp(tok, "!="))
393 pred->not = 1;
394 else if (!strcmp(tok, "=="))
395 pred->not = 0;
396 else {
397 pred->field_name = NULL;
398 return -EINVAL;
399 }
400 } else {
401 val_str = tok;
402 break; /* done */
403 }
404 tok_n = 3;
405 continue;
406 }
407 if (tok_n == 3) {
408 val_str = tok;
409 break; /* done */
410 }
411 }
412
413 pred->field_name = kstrdup(pred->field_name, GFP_KERNEL);
414 if (!pred->field_name)
415 return -ENOMEM;
416
417 pred->val = simple_strtoull(val_str, &tmp, 10);
418 if (tmp == val_str) {
419 pred->str_val = kstrdup(val_str, GFP_KERNEL);
420 if (!pred->str_val)
421 return -ENOMEM;
422 }
423
424 return 0;
425}
426
427
diff --git a/kernel/trace/trace_events_stage_2.h b/kernel/trace/trace_events_stage_2.h
index 5117c43f5c67..30743f7d4110 100644
--- a/kernel/trace/trace_events_stage_2.h
+++ b/kernel/trace/trace_events_stage_2.h
@@ -129,3 +129,48 @@ ftrace_format_##call(struct trace_seq *s) \
129} 129}
130 130
131#include <trace/trace_event_types.h> 131#include <trace/trace_event_types.h>
132
133#undef __field
134#define __field(type, item) \
135 ret = trace_define_field(event_call, #type, #item, \
136 offsetof(typeof(field), item), \
137 sizeof(field.item)); \
138 if (ret) \
139 return ret;
140
141#undef __array
142#define __array(type, item, len) \
143 ret = trace_define_field(event_call, #type "[" #len "]", #item, \
144 offsetof(typeof(field), item), \
145 sizeof(field.item)); \
146 if (ret) \
147 return ret;
148
149#define __common_field(type, item) \
150 ret = trace_define_field(event_call, #type, "common_" #item, \
151 offsetof(typeof(field.ent), item), \
152 sizeof(field.ent.item)); \
153 if (ret) \
154 return ret;
155
156#undef TRACE_EVENT
157#define TRACE_EVENT(call, proto, args, tstruct, func, print) \
158int \
159ftrace_define_fields_##call(void) \
160{ \
161 struct ftrace_raw_##call field; \
162 struct ftrace_event_call *event_call = &event_##call; \
163 int ret; \
164 \
165 __common_field(unsigned char, type); \
166 __common_field(unsigned char, flags); \
167 __common_field(unsigned char, preempt_count); \
168 __common_field(int, pid); \
169 __common_field(int, tgid); \
170 \
171 tstruct; \
172 \
173 return ret; \
174}
175
176#include <trace/trace_event_types.h>
diff --git a/kernel/trace/trace_events_stage_3.h b/kernel/trace/trace_events_stage_3.h
index 6b3261ca988c..9d2fa78cecca 100644
--- a/kernel/trace/trace_events_stage_3.h
+++ b/kernel/trace/trace_events_stage_3.h
@@ -204,6 +204,7 @@ static struct ftrace_event_call event_##call; \
204 \ 204 \
205static void ftrace_raw_event_##call(proto) \ 205static void ftrace_raw_event_##call(proto) \
206{ \ 206{ \
207 struct ftrace_event_call *call = &event_##call; \
207 struct ring_buffer_event *event; \ 208 struct ring_buffer_event *event; \
208 struct ftrace_raw_##call *entry; \ 209 struct ftrace_raw_##call *entry; \
209 unsigned long irq_flags; \ 210 unsigned long irq_flags; \
@@ -221,7 +222,11 @@ static void ftrace_raw_event_##call(proto) \
221 \ 222 \
222 assign; \ 223 assign; \
223 \ 224 \
224 trace_current_buffer_unlock_commit(event, irq_flags, pc); \ 225 if (call->preds && !filter_match_preds(call, entry)) \
226 ring_buffer_event_discard(event); \
227 \
228 trace_nowake_buffer_unlock_commit(event, irq_flags, pc); \
229 \
225} \ 230} \
226 \ 231 \
227static int ftrace_raw_reg_event_##call(void) \ 232static int ftrace_raw_reg_event_##call(void) \
@@ -252,6 +257,7 @@ static int ftrace_raw_init_event_##call(void) \
252 if (!id) \ 257 if (!id) \
253 return -ENODEV; \ 258 return -ENODEV; \
254 event_##call.id = id; \ 259 event_##call.id = id; \
260 INIT_LIST_HEAD(&event_##call.fields); \
255 return 0; \ 261 return 0; \
256} \ 262} \
257 \ 263 \
@@ -264,6 +270,7 @@ __attribute__((section("_ftrace_events"))) event_##call = { \
264 .regfunc = ftrace_raw_reg_event_##call, \ 270 .regfunc = ftrace_raw_reg_event_##call, \
265 .unregfunc = ftrace_raw_unreg_event_##call, \ 271 .unregfunc = ftrace_raw_unreg_event_##call, \
266 .show_format = ftrace_format_##call, \ 272 .show_format = ftrace_format_##call, \
273 .define_fields = ftrace_define_fields_##call, \
267 _TRACE_PROFILE_INIT(call) \ 274 _TRACE_PROFILE_INIT(call) \
268} 275}
269 276
diff --git a/kernel/trace/trace_nop.c b/kernel/trace/trace_nop.c
index 9aa84bde23cd..394f94417e2f 100644
--- a/kernel/trace/trace_nop.c
+++ b/kernel/trace/trace_nop.c
@@ -91,6 +91,7 @@ struct tracer nop_trace __read_mostly =
91 .name = "nop", 91 .name = "nop",
92 .init = nop_trace_init, 92 .init = nop_trace_init,
93 .reset = nop_trace_reset, 93 .reset = nop_trace_reset,
94 .wait_pipe = poll_wait_pipe,
94#ifdef CONFIG_FTRACE_SELFTEST 95#ifdef CONFIG_FTRACE_SELFTEST
95 .selftest = trace_selftest_startup_nop, 96 .selftest = trace_selftest_startup_nop,
96#endif 97#endif
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index a3b6e3fd7044..aeac358ee231 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -147,7 +147,7 @@ int trace_seq_putc(struct trace_seq *s, unsigned char c)
147 return 1; 147 return 1;
148} 148}
149 149
150int trace_seq_putmem(struct trace_seq *s, void *mem, size_t len) 150int trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len)
151{ 151{
152 if (len > ((PAGE_SIZE - 1) - s->len)) 152 if (len > ((PAGE_SIZE - 1) - s->len))
153 return 0; 153 return 0;
@@ -158,10 +158,10 @@ int trace_seq_putmem(struct trace_seq *s, void *mem, size_t len)
158 return len; 158 return len;
159} 159}
160 160
161int trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len) 161int trace_seq_putmem_hex(struct trace_seq *s, const void *mem, size_t len)
162{ 162{
163 unsigned char hex[HEX_CHARS]; 163 unsigned char hex[HEX_CHARS];
164 unsigned char *data = mem; 164 const unsigned char *data = mem;
165 int i, j; 165 int i, j;
166 166
167#ifdef __BIG_ENDIAN 167#ifdef __BIG_ENDIAN
@@ -177,6 +177,19 @@ int trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len)
177 return trace_seq_putmem(s, hex, j); 177 return trace_seq_putmem(s, hex, j);
178} 178}
179 179
180void *trace_seq_reserve(struct trace_seq *s, size_t len)
181{
182 void *ret;
183
184 if (len > ((PAGE_SIZE - 1) - s->len))
185 return NULL;
186
187 ret = s->buffer + s->len;
188 s->len += len;
189
190 return ret;
191}
192
180int trace_seq_path(struct trace_seq *s, struct path *path) 193int trace_seq_path(struct trace_seq *s, struct path *path)
181{ 194{
182 unsigned char *p; 195 unsigned char *p;
diff --git a/kernel/trace/trace_output.h b/kernel/trace/trace_output.h
index 1eac2973374e..91630217fb46 100644
--- a/kernel/trace/trace_output.h
+++ b/kernel/trace/trace_output.h
@@ -31,24 +31,27 @@ seq_print_ip_sym(struct trace_seq *s, unsigned long ip,
31 unsigned long sym_flags); 31 unsigned long sym_flags);
32extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, 32extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
33 size_t cnt); 33 size_t cnt);
34int trace_seq_puts(struct trace_seq *s, const char *str); 34extern int trace_seq_puts(struct trace_seq *s, const char *str);
35int trace_seq_putc(struct trace_seq *s, unsigned char c); 35extern int trace_seq_putc(struct trace_seq *s, unsigned char c);
36int trace_seq_putmem(struct trace_seq *s, void *mem, size_t len); 36extern int trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len);
37int trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len); 37extern int trace_seq_putmem_hex(struct trace_seq *s, const void *mem,
38int trace_seq_path(struct trace_seq *s, struct path *path); 38 size_t len);
39int seq_print_userip_objs(const struct userstack_entry *entry, 39extern void *trace_seq_reserve(struct trace_seq *s, size_t len);
40 struct trace_seq *s, unsigned long sym_flags); 40extern int trace_seq_path(struct trace_seq *s, struct path *path);
41int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm, 41extern int seq_print_userip_objs(const struct userstack_entry *entry,
42 unsigned long ip, unsigned long sym_flags); 42 struct trace_seq *s, unsigned long sym_flags);
43extern int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm,
44 unsigned long ip, unsigned long sym_flags);
43 45
44int trace_print_context(struct trace_iterator *iter); 46extern int trace_print_context(struct trace_iterator *iter);
45int trace_print_lat_context(struct trace_iterator *iter); 47extern int trace_print_lat_context(struct trace_iterator *iter);
46 48
47struct trace_event *ftrace_find_event(int type); 49extern struct trace_event *ftrace_find_event(int type);
48int register_ftrace_event(struct trace_event *event); 50extern int register_ftrace_event(struct trace_event *event);
49int unregister_ftrace_event(struct trace_event *event); 51extern int unregister_ftrace_event(struct trace_event *event);
50 52
51enum print_line_t trace_nop_print(struct trace_iterator *iter, int flags); 53extern enum print_line_t trace_nop_print(struct trace_iterator *iter,
54 int flags);
52 55
53#define MAX_MEMHEX_BYTES 8 56#define MAX_MEMHEX_BYTES 8
54#define HEX_CHARS (MAX_MEMHEX_BYTES*2 + 1) 57#define HEX_CHARS (MAX_MEMHEX_BYTES*2 + 1)
diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c
index f8f48d84b2c3..fdde3a4a94cd 100644
--- a/kernel/trace/trace_stat.c
+++ b/kernel/trace/trace_stat.c
@@ -125,23 +125,21 @@ static int stat_seq_init(struct tracer_stat_session *session)
125 INIT_LIST_HEAD(&new_entry->list); 125 INIT_LIST_HEAD(&new_entry->list);
126 new_entry->stat = stat; 126 new_entry->stat = stat;
127 127
128 list_for_each_entry(iter_entry, &session->stat_list, list) { 128 list_for_each_entry_reverse(iter_entry, &session->stat_list,
129 list) {
129 130
130 /* Insertion with a descendent sorting */ 131 /* Insertion with a descendent sorting */
131 if (ts->stat_cmp(new_entry->stat, 132 if (ts->stat_cmp(iter_entry->stat,
132 iter_entry->stat) > 0) { 133 new_entry->stat) >= 0) {
133 134
134 list_add_tail(&new_entry->list,
135 &iter_entry->list);
136 break;
137
138 /* The current smaller value */
139 } else if (list_is_last(&iter_entry->list,
140 &session->stat_list)) {
141 list_add(&new_entry->list, &iter_entry->list); 135 list_add(&new_entry->list, &iter_entry->list);
142 break; 136 break;
143 } 137 }
144 } 138 }
139
140 /* The current larger value */
141 if (list_empty(&new_entry->list))
142 list_add(&new_entry->list, &session->stat_list);
145 } 143 }
146exit: 144exit:
147 mutex_unlock(&session->stat_mutex); 145 mutex_unlock(&session->stat_mutex);
@@ -163,7 +161,7 @@ static void *stat_seq_start(struct seq_file *s, loff_t *pos)
163 161
164 /* If we are in the beginning of the file, print the headers */ 162 /* If we are in the beginning of the file, print the headers */
165 if (!*pos && session->ts->stat_headers) 163 if (!*pos && session->ts->stat_headers)
166 session->ts->stat_headers(s); 164 return SEQ_START_TOKEN;
167 165
168 return seq_list_start(&session->stat_list, *pos); 166 return seq_list_start(&session->stat_list, *pos);
169} 167}
@@ -172,6 +170,9 @@ static void *stat_seq_next(struct seq_file *s, void *p, loff_t *pos)
172{ 170{
173 struct tracer_stat_session *session = s->private; 171 struct tracer_stat_session *session = s->private;
174 172
173 if (p == SEQ_START_TOKEN)
174 return seq_list_start(&session->stat_list, *pos);
175
175 return seq_list_next(p, &session->stat_list, pos); 176 return seq_list_next(p, &session->stat_list, pos);
176} 177}
177 178
@@ -186,6 +187,9 @@ static int stat_seq_show(struct seq_file *s, void *v)
186 struct tracer_stat_session *session = s->private; 187 struct tracer_stat_session *session = s->private;
187 struct trace_stat_list *l = list_entry(v, struct trace_stat_list, list); 188 struct trace_stat_list *l = list_entry(v, struct trace_stat_list, list);
188 189
190 if (v == SEQ_START_TOKEN)
191 return session->ts->stat_headers(s);
192
189 return session->ts->stat_show(s, l->stat); 193 return session->ts->stat_show(s, l->stat);
190} 194}
191 195
diff --git a/kernel/trace/trace_workqueue.c b/kernel/trace/trace_workqueue.c
index ee533c2e161b..984b9175c13d 100644
--- a/kernel/trace/trace_workqueue.c
+++ b/kernel/trace/trace_workqueue.c
@@ -196,6 +196,11 @@ static int workqueue_stat_show(struct seq_file *s, void *p)
196 struct pid *pid; 196 struct pid *pid;
197 struct task_struct *tsk; 197 struct task_struct *tsk;
198 198
199 spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
200 if (&cws->list == workqueue_cpu_stat(cpu)->list.next)
201 seq_printf(s, "\n");
202 spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
203
199 pid = find_get_pid(cws->pid); 204 pid = find_get_pid(cws->pid);
200 if (pid) { 205 if (pid) {
201 tsk = get_pid_task(pid, PIDTYPE_PID); 206 tsk = get_pid_task(pid, PIDTYPE_PID);
@@ -208,18 +213,13 @@ static int workqueue_stat_show(struct seq_file *s, void *p)
208 put_pid(pid); 213 put_pid(pid);
209 } 214 }
210 215
211 spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
212 if (&cws->list == workqueue_cpu_stat(cpu)->list.next)
213 seq_printf(s, "\n");
214 spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
215
216 return 0; 216 return 0;
217} 217}
218 218
219static int workqueue_stat_headers(struct seq_file *s) 219static int workqueue_stat_headers(struct seq_file *s)
220{ 220{
221 seq_printf(s, "# CPU INSERTED EXECUTED NAME\n"); 221 seq_printf(s, "# CPU INSERTED EXECUTED NAME\n");
222 seq_printf(s, "# | | | |\n\n"); 222 seq_printf(s, "# | | | |\n");
223 return 0; 223 return 0;
224} 224}
225 225