aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/trace
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2009-04-07 07:34:26 -0400
committerIngo Molnar <mingo@elte.hu>2009-04-07 07:34:42 -0400
commit2e8844e13ab73f1107aea4317a53ff5879f2e1d7 (patch)
tree36165371cf6fd26d674610f1c6bb5fac50e6e13f /kernel/trace
parentc78a3956b982418186e40978a51636a2b43221bc (diff)
parentd508afb437daee7cf07da085b635c44a4ebf9b38 (diff)
Merge branch 'linus' into tracing/hw-branch-tracing
Merge reason: update to latest tracing and ptrace APIs Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/trace')
-rw-r--r--kernel/trace/Kconfig16
-rw-r--r--kernel/trace/Makefile2
-rw-r--r--kernel/trace/blktrace.c496
-rw-r--r--kernel/trace/events.c1
-rw-r--r--kernel/trace/ftrace.c116
-rw-r--r--kernel/trace/kmemtrace.c319
-rw-r--r--kernel/trace/ring_buffer.c196
-rw-r--r--kernel/trace/trace.c274
-rw-r--r--kernel/trace/trace.h120
-rw-r--r--kernel/trace/trace_clock.c1
-rw-r--r--kernel/trace/trace_event_profile.c31
-rw-r--r--kernel/trace/trace_event_types.h2
-rw-r--r--kernel/trace/trace_events.c252
-rw-r--r--kernel/trace/trace_events_filter.c427
-rw-r--r--kernel/trace/trace_events_stage_2.h45
-rw-r--r--kernel/trace/trace_events_stage_3.h68
-rw-r--r--kernel/trace/trace_functions_graph.c239
-rw-r--r--kernel/trace/trace_mmiotrace.c2
-rw-r--r--kernel/trace/trace_nop.c1
-rw-r--r--kernel/trace/trace_output.c74
-rw-r--r--kernel/trace/trace_output.h44
-rw-r--r--kernel/trace/trace_power.c8
-rw-r--r--kernel/trace/trace_printk.c8
-rw-r--r--kernel/trace/trace_sched_switch.c9
-rw-r--r--kernel/trace/trace_selftest.c80
-rw-r--r--kernel/trace/trace_stat.c47
-rw-r--r--kernel/trace/trace_syscalls.c171
-rw-r--r--kernel/trace/trace_workqueue.c12
28 files changed, 2299 insertions, 762 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 95a0ad191f19..2246141bda4d 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -63,7 +63,11 @@ config TRACING
63# 63#
64config TRACING_SUPPORT 64config TRACING_SUPPORT
65 bool 65 bool
66 depends on TRACE_IRQFLAGS_SUPPORT 66 # PPC32 has no irqflags tracing support, but it can use most of the
67 # tracers anyway, they were tested to build and work. Note that new
68 # exceptions to this list aren't welcomed, better implement the
69 # irqflags tracing for your architecture.
70 depends on TRACE_IRQFLAGS_SUPPORT || PPC32
67 depends on STACKTRACE_SUPPORT 71 depends on STACKTRACE_SUPPORT
68 default y 72 default y
69 73
@@ -95,11 +99,10 @@ config FUNCTION_GRAPH_TRACER
95 help 99 help
96 Enable the kernel to trace a function at both its return 100 Enable the kernel to trace a function at both its return
97 and its entry. 101 and its entry.
98 It's first purpose is to trace the duration of functions and 102 Its first purpose is to trace the duration of functions and
99 draw a call graph for each thread with some informations like 103 draw a call graph for each thread with some information like
100 the return value. 104 the return value. This is done by setting the current return
101 This is done by setting the current return address on the current 105 address on the current task structure into a stack of calls.
102 task structure into a stack of calls.
103 106
104config IRQSOFF_TRACER 107config IRQSOFF_TRACER
105 bool "Interrupts-off Latency Tracer" 108 bool "Interrupts-off Latency Tracer"
@@ -182,6 +185,7 @@ config FTRACE_SYSCALLS
182 bool "Trace syscalls" 185 bool "Trace syscalls"
183 depends on HAVE_FTRACE_SYSCALLS 186 depends on HAVE_FTRACE_SYSCALLS
184 select TRACING 187 select TRACING
188 select KALLSYMS
185 help 189 help
186 Basic tracer to catch the syscall entry and exit events. 190 Basic tracer to catch the syscall entry and exit events.
187 191
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index c3feea01c3e0..2630f5121ec1 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -44,5 +44,7 @@ obj-$(CONFIG_EVENT_TRACER) += trace_events.o
44obj-$(CONFIG_EVENT_TRACER) += events.o 44obj-$(CONFIG_EVENT_TRACER) += events.o
45obj-$(CONFIG_EVENT_TRACER) += trace_export.o 45obj-$(CONFIG_EVENT_TRACER) += trace_export.o
46obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o 46obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o
47obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o
48obj-$(CONFIG_EVENT_TRACER) += trace_events_filter.o
47 49
48libftrace-y := ftrace.o 50libftrace-y := ftrace.o
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 1f32e4edf490..947c5b3f90c4 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -30,7 +30,7 @@
30static unsigned int blktrace_seq __read_mostly = 1; 30static unsigned int blktrace_seq __read_mostly = 1;
31 31
32static struct trace_array *blk_tr; 32static struct trace_array *blk_tr;
33static int __read_mostly blk_tracer_enabled; 33static bool blk_tracer_enabled __read_mostly;
34 34
35/* Select an alternative, minimalistic output than the original one */ 35/* Select an alternative, minimalistic output than the original one */
36#define TRACE_BLK_OPT_CLASSIC 0x1 36#define TRACE_BLK_OPT_CLASSIC 0x1
@@ -47,10 +47,9 @@ static struct tracer_flags blk_tracer_flags = {
47}; 47};
48 48
49/* Global reference count of probes */ 49/* Global reference count of probes */
50static DEFINE_MUTEX(blk_probe_mutex);
51static atomic_t blk_probes_ref = ATOMIC_INIT(0); 50static atomic_t blk_probes_ref = ATOMIC_INIT(0);
52 51
53static int blk_register_tracepoints(void); 52static void blk_register_tracepoints(void);
54static void blk_unregister_tracepoints(void); 53static void blk_unregister_tracepoints(void);
55 54
56/* 55/*
@@ -60,22 +59,39 @@ static void trace_note(struct blk_trace *bt, pid_t pid, int action,
60 const void *data, size_t len) 59 const void *data, size_t len)
61{ 60{
62 struct blk_io_trace *t; 61 struct blk_io_trace *t;
62 struct ring_buffer_event *event = NULL;
63 int pc = 0;
64 int cpu = smp_processor_id();
65 bool blk_tracer = blk_tracer_enabled;
66
67 if (blk_tracer) {
68 pc = preempt_count();
69 event = trace_buffer_lock_reserve(blk_tr, TRACE_BLK,
70 sizeof(*t) + len,
71 0, pc);
72 if (!event)
73 return;
74 t = ring_buffer_event_data(event);
75 goto record_it;
76 }
63 77
64 if (!bt->rchan) 78 if (!bt->rchan)
65 return; 79 return;
66 80
67 t = relay_reserve(bt->rchan, sizeof(*t) + len); 81 t = relay_reserve(bt->rchan, sizeof(*t) + len);
68 if (t) { 82 if (t) {
69 const int cpu = smp_processor_id();
70
71 t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION; 83 t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION;
72 t->time = ktime_to_ns(ktime_get()); 84 t->time = ktime_to_ns(ktime_get());
85record_it:
73 t->device = bt->dev; 86 t->device = bt->dev;
74 t->action = action; 87 t->action = action;
75 t->pid = pid; 88 t->pid = pid;
76 t->cpu = cpu; 89 t->cpu = cpu;
77 t->pdu_len = len; 90 t->pdu_len = len;
78 memcpy((void *) t + sizeof(*t), data, len); 91 memcpy((void *) t + sizeof(*t), data, len);
92
93 if (blk_tracer)
94 trace_buffer_unlock_commit(blk_tr, event, 0, pc);
79 } 95 }
80} 96}
81 97
@@ -111,14 +127,8 @@ void __trace_note_message(struct blk_trace *bt, const char *fmt, ...)
111 unsigned long flags; 127 unsigned long flags;
112 char *buf; 128 char *buf;
113 129
114 if (blk_tr) { 130 if (unlikely(bt->trace_state != Blktrace_running &&
115 va_start(args, fmt); 131 !blk_tracer_enabled))
116 ftrace_vprintk(fmt, args);
117 va_end(args);
118 return;
119 }
120
121 if (!bt->msg_data)
122 return; 132 return;
123 133
124 local_irq_save(flags); 134 local_irq_save(flags);
@@ -148,8 +158,8 @@ static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector,
148/* 158/*
149 * Data direction bit lookup 159 * Data direction bit lookup
150 */ 160 */
151static u32 ddir_act[2] __read_mostly = { BLK_TC_ACT(BLK_TC_READ), 161static const u32 ddir_act[2] = { BLK_TC_ACT(BLK_TC_READ),
152 BLK_TC_ACT(BLK_TC_WRITE) }; 162 BLK_TC_ACT(BLK_TC_WRITE) };
153 163
154/* The ilog2() calls fall out because they're constant */ 164/* The ilog2() calls fall out because they're constant */
155#define MASK_TC_BIT(rw, __name) ((rw & (1 << BIO_RW_ ## __name)) << \ 165#define MASK_TC_BIT(rw, __name) ((rw & (1 << BIO_RW_ ## __name)) << \
@@ -169,9 +179,9 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
169 unsigned long *sequence; 179 unsigned long *sequence;
170 pid_t pid; 180 pid_t pid;
171 int cpu, pc = 0; 181 int cpu, pc = 0;
182 bool blk_tracer = blk_tracer_enabled;
172 183
173 if (unlikely(bt->trace_state != Blktrace_running || 184 if (unlikely(bt->trace_state != Blktrace_running && !blk_tracer))
174 !blk_tracer_enabled))
175 return; 185 return;
176 186
177 what |= ddir_act[rw & WRITE]; 187 what |= ddir_act[rw & WRITE];
@@ -186,7 +196,7 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
186 return; 196 return;
187 cpu = raw_smp_processor_id(); 197 cpu = raw_smp_processor_id();
188 198
189 if (blk_tr) { 199 if (blk_tracer) {
190 tracing_record_cmdline(current); 200 tracing_record_cmdline(current);
191 201
192 pc = preempt_count(); 202 pc = preempt_count();
@@ -236,7 +246,7 @@ record_it:
236 if (pdu_len) 246 if (pdu_len)
237 memcpy((void *) t + sizeof(*t), pdu_data, pdu_len); 247 memcpy((void *) t + sizeof(*t), pdu_data, pdu_len);
238 248
239 if (blk_tr) { 249 if (blk_tracer) {
240 trace_buffer_unlock_commit(blk_tr, event, 0, pc); 250 trace_buffer_unlock_commit(blk_tr, event, 0, pc);
241 return; 251 return;
242 } 252 }
@@ -248,7 +258,7 @@ record_it:
248static struct dentry *blk_tree_root; 258static struct dentry *blk_tree_root;
249static DEFINE_MUTEX(blk_tree_mutex); 259static DEFINE_MUTEX(blk_tree_mutex);
250 260
251static void blk_trace_cleanup(struct blk_trace *bt) 261static void blk_trace_free(struct blk_trace *bt)
252{ 262{
253 debugfs_remove(bt->msg_file); 263 debugfs_remove(bt->msg_file);
254 debugfs_remove(bt->dropped_file); 264 debugfs_remove(bt->dropped_file);
@@ -256,10 +266,13 @@ static void blk_trace_cleanup(struct blk_trace *bt)
256 free_percpu(bt->sequence); 266 free_percpu(bt->sequence);
257 free_percpu(bt->msg_data); 267 free_percpu(bt->msg_data);
258 kfree(bt); 268 kfree(bt);
259 mutex_lock(&blk_probe_mutex); 269}
270
271static void blk_trace_cleanup(struct blk_trace *bt)
272{
273 blk_trace_free(bt);
260 if (atomic_dec_and_test(&blk_probes_ref)) 274 if (atomic_dec_and_test(&blk_probes_ref))
261 blk_unregister_tracepoints(); 275 blk_unregister_tracepoints();
262 mutex_unlock(&blk_probe_mutex);
263} 276}
264 277
265int blk_trace_remove(struct request_queue *q) 278int blk_trace_remove(struct request_queue *q)
@@ -270,8 +283,7 @@ int blk_trace_remove(struct request_queue *q)
270 if (!bt) 283 if (!bt)
271 return -EINVAL; 284 return -EINVAL;
272 285
273 if (bt->trace_state == Blktrace_setup || 286 if (bt->trace_state != Blktrace_running)
274 bt->trace_state == Blktrace_stopped)
275 blk_trace_cleanup(bt); 287 blk_trace_cleanup(bt);
276 288
277 return 0; 289 return 0;
@@ -414,11 +426,11 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
414 if (buts->name[i] == '/') 426 if (buts->name[i] == '/')
415 buts->name[i] = '_'; 427 buts->name[i] = '_';
416 428
417 ret = -ENOMEM;
418 bt = kzalloc(sizeof(*bt), GFP_KERNEL); 429 bt = kzalloc(sizeof(*bt), GFP_KERNEL);
419 if (!bt) 430 if (!bt)
420 goto err; 431 return -ENOMEM;
421 432
433 ret = -ENOMEM;
422 bt->sequence = alloc_percpu(unsigned long); 434 bt->sequence = alloc_percpu(unsigned long);
423 if (!bt->sequence) 435 if (!bt->sequence)
424 goto err; 436 goto err;
@@ -429,11 +441,15 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
429 441
430 ret = -ENOENT; 442 ret = -ENOENT;
431 443
444 mutex_lock(&blk_tree_mutex);
432 if (!blk_tree_root) { 445 if (!blk_tree_root) {
433 blk_tree_root = debugfs_create_dir("block", NULL); 446 blk_tree_root = debugfs_create_dir("block", NULL);
434 if (!blk_tree_root) 447 if (!blk_tree_root) {
435 return -ENOMEM; 448 mutex_unlock(&blk_tree_mutex);
449 goto err;
450 }
436 } 451 }
452 mutex_unlock(&blk_tree_mutex);
437 453
438 dir = debugfs_create_dir(buts->name, blk_tree_root); 454 dir = debugfs_create_dir(buts->name, blk_tree_root);
439 455
@@ -471,14 +487,6 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
471 bt->pid = buts->pid; 487 bt->pid = buts->pid;
472 bt->trace_state = Blktrace_setup; 488 bt->trace_state = Blktrace_setup;
473 489
474 mutex_lock(&blk_probe_mutex);
475 if (atomic_add_return(1, &blk_probes_ref) == 1) {
476 ret = blk_register_tracepoints();
477 if (ret)
478 goto probe_err;
479 }
480 mutex_unlock(&blk_probe_mutex);
481
482 ret = -EBUSY; 490 ret = -EBUSY;
483 old_bt = xchg(&q->blk_trace, bt); 491 old_bt = xchg(&q->blk_trace, bt);
484 if (old_bt) { 492 if (old_bt) {
@@ -486,22 +494,12 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
486 goto err; 494 goto err;
487 } 495 }
488 496
497 if (atomic_inc_return(&blk_probes_ref) == 1)
498 blk_register_tracepoints();
499
489 return 0; 500 return 0;
490probe_err:
491 atomic_dec(&blk_probes_ref);
492 mutex_unlock(&blk_probe_mutex);
493err: 501err:
494 if (bt) { 502 blk_trace_free(bt);
495 if (bt->msg_file)
496 debugfs_remove(bt->msg_file);
497 if (bt->dropped_file)
498 debugfs_remove(bt->dropped_file);
499 free_percpu(bt->sequence);
500 free_percpu(bt->msg_data);
501 if (bt->rchan)
502 relay_close(bt->rchan);
503 kfree(bt);
504 }
505 return ret; 503 return ret;
506} 504}
507 505
@@ -863,7 +861,7 @@ void blk_add_driver_data(struct request_queue *q,
863} 861}
864EXPORT_SYMBOL_GPL(blk_add_driver_data); 862EXPORT_SYMBOL_GPL(blk_add_driver_data);
865 863
866static int blk_register_tracepoints(void) 864static void blk_register_tracepoints(void)
867{ 865{
868 int ret; 866 int ret;
869 867
@@ -901,7 +899,6 @@ static int blk_register_tracepoints(void)
901 WARN_ON(ret); 899 WARN_ON(ret);
902 ret = register_trace_block_remap(blk_add_trace_remap); 900 ret = register_trace_block_remap(blk_add_trace_remap);
903 WARN_ON(ret); 901 WARN_ON(ret);
904 return 0;
905} 902}
906 903
907static void blk_unregister_tracepoints(void) 904static void blk_unregister_tracepoints(void)
@@ -934,25 +931,31 @@ static void blk_unregister_tracepoints(void)
934static void fill_rwbs(char *rwbs, const struct blk_io_trace *t) 931static void fill_rwbs(char *rwbs, const struct blk_io_trace *t)
935{ 932{
936 int i = 0; 933 int i = 0;
934 int tc = t->action >> BLK_TC_SHIFT;
935
936 if (t->action == BLK_TN_MESSAGE) {
937 rwbs[i++] = 'N';
938 goto out;
939 }
937 940
938 if (t->action & BLK_TC_DISCARD) 941 if (tc & BLK_TC_DISCARD)
939 rwbs[i++] = 'D'; 942 rwbs[i++] = 'D';
940 else if (t->action & BLK_TC_WRITE) 943 else if (tc & BLK_TC_WRITE)
941 rwbs[i++] = 'W'; 944 rwbs[i++] = 'W';
942 else if (t->bytes) 945 else if (t->bytes)
943 rwbs[i++] = 'R'; 946 rwbs[i++] = 'R';
944 else 947 else
945 rwbs[i++] = 'N'; 948 rwbs[i++] = 'N';
946 949
947 if (t->action & BLK_TC_AHEAD) 950 if (tc & BLK_TC_AHEAD)
948 rwbs[i++] = 'A'; 951 rwbs[i++] = 'A';
949 if (t->action & BLK_TC_BARRIER) 952 if (tc & BLK_TC_BARRIER)
950 rwbs[i++] = 'B'; 953 rwbs[i++] = 'B';
951 if (t->action & BLK_TC_SYNC) 954 if (tc & BLK_TC_SYNC)
952 rwbs[i++] = 'S'; 955 rwbs[i++] = 'S';
953 if (t->action & BLK_TC_META) 956 if (tc & BLK_TC_META)
954 rwbs[i++] = 'M'; 957 rwbs[i++] = 'M';
955 958out:
956 rwbs[i] = '\0'; 959 rwbs[i] = '\0';
957} 960}
958 961
@@ -979,7 +982,7 @@ static inline unsigned long long t_sector(const struct trace_entry *ent)
979 982
980static inline __u16 t_error(const struct trace_entry *ent) 983static inline __u16 t_error(const struct trace_entry *ent)
981{ 984{
982 return te_blk_io_trace(ent)->sector; 985 return te_blk_io_trace(ent)->error;
983} 986}
984 987
985static __u64 get_pdu_int(const struct trace_entry *ent) 988static __u64 get_pdu_int(const struct trace_entry *ent)
@@ -999,35 +1002,39 @@ static void get_pdu_remap(const struct trace_entry *ent,
999 r->sector = be64_to_cpu(sector); 1002 r->sector = be64_to_cpu(sector);
1000} 1003}
1001 1004
1002static int blk_log_action_iter(struct trace_iterator *iter, const char *act) 1005typedef int (blk_log_action_t) (struct trace_iterator *iter, const char *act);
1006
1007static int blk_log_action_classic(struct trace_iterator *iter, const char *act)
1003{ 1008{
1004 char rwbs[6]; 1009 char rwbs[6];
1005 unsigned long long ts = ns2usecs(iter->ts); 1010 unsigned long long ts = iter->ts;
1006 unsigned long usec_rem = do_div(ts, USEC_PER_SEC); 1011 unsigned long nsec_rem = do_div(ts, NSEC_PER_SEC);
1007 unsigned secs = (unsigned long)ts; 1012 unsigned secs = (unsigned long)ts;
1008 const struct trace_entry *ent = iter->ent; 1013 const struct blk_io_trace *t = te_blk_io_trace(iter->ent);
1009 const struct blk_io_trace *t = (const struct blk_io_trace *)ent;
1010 1014
1011 fill_rwbs(rwbs, t); 1015 fill_rwbs(rwbs, t);
1012 1016
1013 return trace_seq_printf(&iter->seq, 1017 return trace_seq_printf(&iter->seq,
1014 "%3d,%-3d %2d %5d.%06lu %5u %2s %3s ", 1018 "%3d,%-3d %2d %5d.%09lu %5u %2s %3s ",
1015 MAJOR(t->device), MINOR(t->device), iter->cpu, 1019 MAJOR(t->device), MINOR(t->device), iter->cpu,
1016 secs, usec_rem, ent->pid, act, rwbs); 1020 secs, nsec_rem, iter->ent->pid, act, rwbs);
1017} 1021}
1018 1022
1019static int blk_log_action_seq(struct trace_seq *s, const struct blk_io_trace *t, 1023static int blk_log_action(struct trace_iterator *iter, const char *act)
1020 const char *act)
1021{ 1024{
1022 char rwbs[6]; 1025 char rwbs[6];
1026 const struct blk_io_trace *t = te_blk_io_trace(iter->ent);
1027
1023 fill_rwbs(rwbs, t); 1028 fill_rwbs(rwbs, t);
1024 return trace_seq_printf(s, "%3d,%-3d %2s %3s ", 1029 return trace_seq_printf(&iter->seq, "%3d,%-3d %2s %3s ",
1025 MAJOR(t->device), MINOR(t->device), act, rwbs); 1030 MAJOR(t->device), MINOR(t->device), act, rwbs);
1026} 1031}
1027 1032
1028static int blk_log_generic(struct trace_seq *s, const struct trace_entry *ent) 1033static int blk_log_generic(struct trace_seq *s, const struct trace_entry *ent)
1029{ 1034{
1030 const char *cmd = trace_find_cmdline(ent->pid); 1035 char cmd[TASK_COMM_LEN];
1036
1037 trace_find_cmdline(ent->pid, cmd);
1031 1038
1032 if (t_sec(ent)) 1039 if (t_sec(ent))
1033 return trace_seq_printf(s, "%llu + %u [%s]\n", 1040 return trace_seq_printf(s, "%llu + %u [%s]\n",
@@ -1057,19 +1064,41 @@ static int blk_log_remap(struct trace_seq *s, const struct trace_entry *ent)
1057 1064
1058static int blk_log_plug(struct trace_seq *s, const struct trace_entry *ent) 1065static int blk_log_plug(struct trace_seq *s, const struct trace_entry *ent)
1059{ 1066{
1060 return trace_seq_printf(s, "[%s]\n", trace_find_cmdline(ent->pid)); 1067 char cmd[TASK_COMM_LEN];
1068
1069 trace_find_cmdline(ent->pid, cmd);
1070
1071 return trace_seq_printf(s, "[%s]\n", cmd);
1061} 1072}
1062 1073
1063static int blk_log_unplug(struct trace_seq *s, const struct trace_entry *ent) 1074static int blk_log_unplug(struct trace_seq *s, const struct trace_entry *ent)
1064{ 1075{
1065 return trace_seq_printf(s, "[%s] %llu\n", trace_find_cmdline(ent->pid), 1076 char cmd[TASK_COMM_LEN];
1066 get_pdu_int(ent)); 1077
1078 trace_find_cmdline(ent->pid, cmd);
1079
1080 return trace_seq_printf(s, "[%s] %llu\n", cmd, get_pdu_int(ent));
1067} 1081}
1068 1082
1069static int blk_log_split(struct trace_seq *s, const struct trace_entry *ent) 1083static int blk_log_split(struct trace_seq *s, const struct trace_entry *ent)
1070{ 1084{
1085 char cmd[TASK_COMM_LEN];
1086
1087 trace_find_cmdline(ent->pid, cmd);
1088
1071 return trace_seq_printf(s, "%llu / %llu [%s]\n", t_sector(ent), 1089 return trace_seq_printf(s, "%llu / %llu [%s]\n", t_sector(ent),
1072 get_pdu_int(ent), trace_find_cmdline(ent->pid)); 1090 get_pdu_int(ent), cmd);
1091}
1092
1093static int blk_log_msg(struct trace_seq *s, const struct trace_entry *ent)
1094{
1095 int ret;
1096 const struct blk_io_trace *t = te_blk_io_trace(ent);
1097
1098 ret = trace_seq_putmem(s, t + 1, t->pdu_len);
1099 if (ret)
1100 return trace_seq_putc(s, '\n');
1101 return ret;
1073} 1102}
1074 1103
1075/* 1104/*
@@ -1086,11 +1115,7 @@ static void blk_tracer_print_header(struct seq_file *m)
1086 1115
1087static void blk_tracer_start(struct trace_array *tr) 1116static void blk_tracer_start(struct trace_array *tr)
1088{ 1117{
1089 mutex_lock(&blk_probe_mutex); 1118 blk_tracer_enabled = true;
1090 if (atomic_add_return(1, &blk_probes_ref) == 1)
1091 if (blk_register_tracepoints())
1092 atomic_dec(&blk_probes_ref);
1093 mutex_unlock(&blk_probe_mutex);
1094 trace_flags &= ~TRACE_ITER_CONTEXT_INFO; 1119 trace_flags &= ~TRACE_ITER_CONTEXT_INFO;
1095} 1120}
1096 1121
@@ -1098,38 +1123,24 @@ static int blk_tracer_init(struct trace_array *tr)
1098{ 1123{
1099 blk_tr = tr; 1124 blk_tr = tr;
1100 blk_tracer_start(tr); 1125 blk_tracer_start(tr);
1101 mutex_lock(&blk_probe_mutex);
1102 blk_tracer_enabled++;
1103 mutex_unlock(&blk_probe_mutex);
1104 return 0; 1126 return 0;
1105} 1127}
1106 1128
1107static void blk_tracer_stop(struct trace_array *tr) 1129static void blk_tracer_stop(struct trace_array *tr)
1108{ 1130{
1131 blk_tracer_enabled = false;
1109 trace_flags |= TRACE_ITER_CONTEXT_INFO; 1132 trace_flags |= TRACE_ITER_CONTEXT_INFO;
1110 mutex_lock(&blk_probe_mutex);
1111 if (atomic_dec_and_test(&blk_probes_ref))
1112 blk_unregister_tracepoints();
1113 mutex_unlock(&blk_probe_mutex);
1114} 1133}
1115 1134
1116static void blk_tracer_reset(struct trace_array *tr) 1135static void blk_tracer_reset(struct trace_array *tr)
1117{ 1136{
1118 if (!atomic_read(&blk_probes_ref))
1119 return;
1120
1121 mutex_lock(&blk_probe_mutex);
1122 blk_tracer_enabled--;
1123 WARN_ON(blk_tracer_enabled < 0);
1124 mutex_unlock(&blk_probe_mutex);
1125
1126 blk_tracer_stop(tr); 1137 blk_tracer_stop(tr);
1127} 1138}
1128 1139
1129static struct { 1140static const struct {
1130 const char *act[2]; 1141 const char *act[2];
1131 int (*print)(struct trace_seq *s, const struct trace_entry *ent); 1142 int (*print)(struct trace_seq *s, const struct trace_entry *ent);
1132} what2act[] __read_mostly = { 1143} what2act[] = {
1133 [__BLK_TA_QUEUE] = {{ "Q", "queue" }, blk_log_generic }, 1144 [__BLK_TA_QUEUE] = {{ "Q", "queue" }, blk_log_generic },
1134 [__BLK_TA_BACKMERGE] = {{ "M", "backmerge" }, blk_log_generic }, 1145 [__BLK_TA_BACKMERGE] = {{ "M", "backmerge" }, blk_log_generic },
1135 [__BLK_TA_FRONTMERGE] = {{ "F", "frontmerge" }, blk_log_generic }, 1146 [__BLK_TA_FRONTMERGE] = {{ "F", "frontmerge" }, blk_log_generic },
@@ -1147,29 +1158,48 @@ static struct {
1147 [__BLK_TA_REMAP] = {{ "A", "remap" }, blk_log_remap }, 1158 [__BLK_TA_REMAP] = {{ "A", "remap" }, blk_log_remap },
1148}; 1159};
1149 1160
1150static enum print_line_t blk_trace_event_print(struct trace_iterator *iter, 1161static enum print_line_t print_one_line(struct trace_iterator *iter,
1151 int flags) 1162 bool classic)
1152{ 1163{
1153 struct trace_seq *s = &iter->seq; 1164 struct trace_seq *s = &iter->seq;
1154 const struct blk_io_trace *t = (struct blk_io_trace *)iter->ent; 1165 const struct blk_io_trace *t;
1155 const u16 what = t->action & ((1 << BLK_TC_SHIFT) - 1); 1166 u16 what;
1156 int ret; 1167 int ret;
1168 bool long_act;
1169 blk_log_action_t *log_action;
1157 1170
1158 if (!trace_print_context(iter)) 1171 t = te_blk_io_trace(iter->ent);
1159 return TRACE_TYPE_PARTIAL_LINE; 1172 what = t->action & ((1 << BLK_TC_SHIFT) - 1);
1173 long_act = !!(trace_flags & TRACE_ITER_VERBOSE);
1174 log_action = classic ? &blk_log_action_classic : &blk_log_action;
1160 1175
1161 if (unlikely(what == 0 || what > ARRAY_SIZE(what2act))) 1176 if (t->action == BLK_TN_MESSAGE) {
1177 ret = log_action(iter, long_act ? "message" : "m");
1178 if (ret)
1179 ret = blk_log_msg(s, iter->ent);
1180 goto out;
1181 }
1182
1183 if (unlikely(what == 0 || what >= ARRAY_SIZE(what2act)))
1162 ret = trace_seq_printf(s, "Bad pc action %x\n", what); 1184 ret = trace_seq_printf(s, "Bad pc action %x\n", what);
1163 else { 1185 else {
1164 const bool long_act = !!(trace_flags & TRACE_ITER_VERBOSE); 1186 ret = log_action(iter, what2act[what].act[long_act]);
1165 ret = blk_log_action_seq(s, t, what2act[what].act[long_act]);
1166 if (ret) 1187 if (ret)
1167 ret = what2act[what].print(s, iter->ent); 1188 ret = what2act[what].print(s, iter->ent);
1168 } 1189 }
1169 1190out:
1170 return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE; 1191 return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE;
1171} 1192}
1172 1193
1194static enum print_line_t blk_trace_event_print(struct trace_iterator *iter,
1195 int flags)
1196{
1197 if (!trace_print_context(iter))
1198 return TRACE_TYPE_PARTIAL_LINE;
1199
1200 return print_one_line(iter, false);
1201}
1202
1173static int blk_trace_synthesize_old_trace(struct trace_iterator *iter) 1203static int blk_trace_synthesize_old_trace(struct trace_iterator *iter)
1174{ 1204{
1175 struct trace_seq *s = &iter->seq; 1205 struct trace_seq *s = &iter->seq;
@@ -1177,7 +1207,7 @@ static int blk_trace_synthesize_old_trace(struct trace_iterator *iter)
1177 const int offset = offsetof(struct blk_io_trace, sector); 1207 const int offset = offsetof(struct blk_io_trace, sector);
1178 struct blk_io_trace old = { 1208 struct blk_io_trace old = {
1179 .magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION, 1209 .magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION,
1180 .time = ns2usecs(iter->ts), 1210 .time = iter->ts,
1181 }; 1211 };
1182 1212
1183 if (!trace_seq_putmem(s, &old, offset)) 1213 if (!trace_seq_putmem(s, &old, offset))
@@ -1195,26 +1225,10 @@ blk_trace_event_print_binary(struct trace_iterator *iter, int flags)
1195 1225
1196static enum print_line_t blk_tracer_print_line(struct trace_iterator *iter) 1226static enum print_line_t blk_tracer_print_line(struct trace_iterator *iter)
1197{ 1227{
1198 const struct blk_io_trace *t;
1199 u16 what;
1200 int ret;
1201
1202 if (!(blk_tracer_flags.val & TRACE_BLK_OPT_CLASSIC)) 1228 if (!(blk_tracer_flags.val & TRACE_BLK_OPT_CLASSIC))
1203 return TRACE_TYPE_UNHANDLED; 1229 return TRACE_TYPE_UNHANDLED;
1204 1230
1205 t = (const struct blk_io_trace *)iter->ent; 1231 return print_one_line(iter, true);
1206 what = t->action & ((1 << BLK_TC_SHIFT) - 1);
1207
1208 if (unlikely(what == 0 || what > ARRAY_SIZE(what2act)))
1209 ret = trace_seq_printf(&iter->seq, "Bad pc action %x\n", what);
1210 else {
1211 const bool long_act = !!(trace_flags & TRACE_ITER_VERBOSE);
1212 ret = blk_log_action_iter(iter, what2act[what].act[long_act]);
1213 if (ret)
1214 ret = what2act[what].print(&iter->seq, iter->ent);
1215 }
1216
1217 return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE;
1218} 1232}
1219 1233
1220static struct tracer blk_tracer __read_mostly = { 1234static struct tracer blk_tracer __read_mostly = {
@@ -1260,7 +1274,10 @@ static int blk_trace_remove_queue(struct request_queue *q)
1260 if (bt == NULL) 1274 if (bt == NULL)
1261 return -EINVAL; 1275 return -EINVAL;
1262 1276
1263 kfree(bt); 1277 if (atomic_dec_and_test(&blk_probes_ref))
1278 blk_unregister_tracepoints();
1279
1280 blk_trace_free(bt);
1264 return 0; 1281 return 0;
1265} 1282}
1266 1283
@@ -1270,26 +1287,33 @@ static int blk_trace_remove_queue(struct request_queue *q)
1270static int blk_trace_setup_queue(struct request_queue *q, dev_t dev) 1287static int blk_trace_setup_queue(struct request_queue *q, dev_t dev)
1271{ 1288{
1272 struct blk_trace *old_bt, *bt = NULL; 1289 struct blk_trace *old_bt, *bt = NULL;
1273 int ret; 1290 int ret = -ENOMEM;
1274 1291
1275 ret = -ENOMEM;
1276 bt = kzalloc(sizeof(*bt), GFP_KERNEL); 1292 bt = kzalloc(sizeof(*bt), GFP_KERNEL);
1277 if (!bt) 1293 if (!bt)
1278 goto err; 1294 return -ENOMEM;
1295
1296 bt->msg_data = __alloc_percpu(BLK_TN_MAX_MSG, __alignof__(char));
1297 if (!bt->msg_data)
1298 goto free_bt;
1279 1299
1280 bt->dev = dev; 1300 bt->dev = dev;
1281 bt->act_mask = (u16)-1; 1301 bt->act_mask = (u16)-1;
1282 bt->end_lba = -1ULL; 1302 bt->end_lba = -1ULL;
1283 bt->trace_state = Blktrace_running;
1284 1303
1285 old_bt = xchg(&q->blk_trace, bt); 1304 old_bt = xchg(&q->blk_trace, bt);
1286 if (old_bt != NULL) { 1305 if (old_bt != NULL) {
1287 (void)xchg(&q->blk_trace, old_bt); 1306 (void)xchg(&q->blk_trace, old_bt);
1288 kfree(bt);
1289 ret = -EBUSY; 1307 ret = -EBUSY;
1308 goto free_bt;
1290 } 1309 }
1310
1311 if (atomic_inc_return(&blk_probes_ref) == 1)
1312 blk_register_tracepoints();
1291 return 0; 1313 return 0;
1292err: 1314
1315free_bt:
1316 blk_trace_free(bt);
1293 return ret; 1317 return ret;
1294} 1318}
1295 1319
@@ -1297,72 +1321,6 @@ err:
1297 * sysfs interface to enable and configure tracing 1321 * sysfs interface to enable and configure tracing
1298 */ 1322 */
1299 1323
1300static ssize_t sysfs_blk_trace_enable_show(struct device *dev,
1301 struct device_attribute *attr,
1302 char *buf)
1303{
1304 struct hd_struct *p = dev_to_part(dev);
1305 struct block_device *bdev;
1306 ssize_t ret = -ENXIO;
1307
1308 lock_kernel();
1309 bdev = bdget(part_devt(p));
1310 if (bdev != NULL) {
1311 struct request_queue *q = bdev_get_queue(bdev);
1312
1313 if (q != NULL) {
1314 mutex_lock(&bdev->bd_mutex);
1315 ret = sprintf(buf, "%u\n", !!q->blk_trace);
1316 mutex_unlock(&bdev->bd_mutex);
1317 }
1318
1319 bdput(bdev);
1320 }
1321
1322 unlock_kernel();
1323 return ret;
1324}
1325
1326static ssize_t sysfs_blk_trace_enable_store(struct device *dev,
1327 struct device_attribute *attr,
1328 const char *buf, size_t count)
1329{
1330 struct block_device *bdev;
1331 struct request_queue *q;
1332 struct hd_struct *p;
1333 int value;
1334 ssize_t ret = -ENXIO;
1335
1336 if (count == 0 || sscanf(buf, "%d", &value) != 1)
1337 goto out;
1338
1339 lock_kernel();
1340 p = dev_to_part(dev);
1341 bdev = bdget(part_devt(p));
1342 if (bdev == NULL)
1343 goto out_unlock_kernel;
1344
1345 q = bdev_get_queue(bdev);
1346 if (q == NULL)
1347 goto out_bdput;
1348
1349 mutex_lock(&bdev->bd_mutex);
1350 if (value)
1351 ret = blk_trace_setup_queue(q, bdev->bd_dev);
1352 else
1353 ret = blk_trace_remove_queue(q);
1354 mutex_unlock(&bdev->bd_mutex);
1355
1356 if (ret == 0)
1357 ret = count;
1358out_bdput:
1359 bdput(bdev);
1360out_unlock_kernel:
1361 unlock_kernel();
1362out:
1363 return ret;
1364}
1365
1366static ssize_t sysfs_blk_trace_attr_show(struct device *dev, 1324static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
1367 struct device_attribute *attr, 1325 struct device_attribute *attr,
1368 char *buf); 1326 char *buf);
@@ -1374,8 +1332,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
1374 sysfs_blk_trace_attr_show, \ 1332 sysfs_blk_trace_attr_show, \
1375 sysfs_blk_trace_attr_store) 1333 sysfs_blk_trace_attr_store)
1376 1334
1377static DEVICE_ATTR(enable, S_IRUGO | S_IWUSR, 1335static BLK_TRACE_DEVICE_ATTR(enable);
1378 sysfs_blk_trace_enable_show, sysfs_blk_trace_enable_store);
1379static BLK_TRACE_DEVICE_ATTR(act_mask); 1336static BLK_TRACE_DEVICE_ATTR(act_mask);
1380static BLK_TRACE_DEVICE_ATTR(pid); 1337static BLK_TRACE_DEVICE_ATTR(pid);
1381static BLK_TRACE_DEVICE_ATTR(start_lba); 1338static BLK_TRACE_DEVICE_ATTR(start_lba);
@@ -1395,53 +1352,85 @@ struct attribute_group blk_trace_attr_group = {
1395 .attrs = blk_trace_attrs, 1352 .attrs = blk_trace_attrs,
1396}; 1353};
1397 1354
1398static int blk_str2act_mask(const char *str) 1355static const struct {
1356 int mask;
1357 const char *str;
1358} mask_maps[] = {
1359 { BLK_TC_READ, "read" },
1360 { BLK_TC_WRITE, "write" },
1361 { BLK_TC_BARRIER, "barrier" },
1362 { BLK_TC_SYNC, "sync" },
1363 { BLK_TC_QUEUE, "queue" },
1364 { BLK_TC_REQUEUE, "requeue" },
1365 { BLK_TC_ISSUE, "issue" },
1366 { BLK_TC_COMPLETE, "complete" },
1367 { BLK_TC_FS, "fs" },
1368 { BLK_TC_PC, "pc" },
1369 { BLK_TC_AHEAD, "ahead" },
1370 { BLK_TC_META, "meta" },
1371 { BLK_TC_DISCARD, "discard" },
1372 { BLK_TC_DRV_DATA, "drv_data" },
1373};
1374
1375static int blk_trace_str2mask(const char *str)
1399{ 1376{
1377 int i;
1400 int mask = 0; 1378 int mask = 0;
1401 char *copy = kstrdup(str, GFP_KERNEL), *s; 1379 char *s, *token;
1402 1380
1403 if (copy == NULL) 1381 s = kstrdup(str, GFP_KERNEL);
1382 if (s == NULL)
1404 return -ENOMEM; 1383 return -ENOMEM;
1405 1384 s = strstrip(s);
1406 s = strstrip(copy);
1407 1385
1408 while (1) { 1386 while (1) {
1409 char *sep = strchr(s, ','); 1387 token = strsep(&s, ",");
1410 1388 if (token == NULL)
1411 if (sep != NULL)
1412 *sep = '\0';
1413
1414 if (strcasecmp(s, "barrier") == 0)
1415 mask |= BLK_TC_BARRIER;
1416 else if (strcasecmp(s, "complete") == 0)
1417 mask |= BLK_TC_COMPLETE;
1418 else if (strcasecmp(s, "fs") == 0)
1419 mask |= BLK_TC_FS;
1420 else if (strcasecmp(s, "issue") == 0)
1421 mask |= BLK_TC_ISSUE;
1422 else if (strcasecmp(s, "pc") == 0)
1423 mask |= BLK_TC_PC;
1424 else if (strcasecmp(s, "queue") == 0)
1425 mask |= BLK_TC_QUEUE;
1426 else if (strcasecmp(s, "read") == 0)
1427 mask |= BLK_TC_READ;
1428 else if (strcasecmp(s, "requeue") == 0)
1429 mask |= BLK_TC_REQUEUE;
1430 else if (strcasecmp(s, "sync") == 0)
1431 mask |= BLK_TC_SYNC;
1432 else if (strcasecmp(s, "write") == 0)
1433 mask |= BLK_TC_WRITE;
1434
1435 if (sep == NULL)
1436 break; 1389 break;
1437 1390
1438 s = sep + 1; 1391 if (*token == '\0')
1392 continue;
1393
1394 for (i = 0; i < ARRAY_SIZE(mask_maps); i++) {
1395 if (strcasecmp(token, mask_maps[i].str) == 0) {
1396 mask |= mask_maps[i].mask;
1397 break;
1398 }
1399 }
1400 if (i == ARRAY_SIZE(mask_maps)) {
1401 mask = -EINVAL;
1402 break;
1403 }
1439 } 1404 }
1440 kfree(copy); 1405 kfree(s);
1441 1406
1442 return mask; 1407 return mask;
1443} 1408}
1444 1409
1410static ssize_t blk_trace_mask2str(char *buf, int mask)
1411{
1412 int i;
1413 char *p = buf;
1414
1415 for (i = 0; i < ARRAY_SIZE(mask_maps); i++) {
1416 if (mask & mask_maps[i].mask) {
1417 p += sprintf(p, "%s%s",
1418 (p == buf) ? "" : ",", mask_maps[i].str);
1419 }
1420 }
1421 *p++ = '\n';
1422
1423 return p - buf;
1424}
1425
1426static struct request_queue *blk_trace_get_queue(struct block_device *bdev)
1427{
1428 if (bdev->bd_disk == NULL)
1429 return NULL;
1430
1431 return bdev_get_queue(bdev);
1432}
1433
1445static ssize_t sysfs_blk_trace_attr_show(struct device *dev, 1434static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
1446 struct device_attribute *attr, 1435 struct device_attribute *attr,
1447 char *buf) 1436 char *buf)
@@ -1456,20 +1445,29 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
1456 if (bdev == NULL) 1445 if (bdev == NULL)
1457 goto out_unlock_kernel; 1446 goto out_unlock_kernel;
1458 1447
1459 q = bdev_get_queue(bdev); 1448 q = blk_trace_get_queue(bdev);
1460 if (q == NULL) 1449 if (q == NULL)
1461 goto out_bdput; 1450 goto out_bdput;
1451
1462 mutex_lock(&bdev->bd_mutex); 1452 mutex_lock(&bdev->bd_mutex);
1453
1454 if (attr == &dev_attr_enable) {
1455 ret = sprintf(buf, "%u\n", !!q->blk_trace);
1456 goto out_unlock_bdev;
1457 }
1458
1463 if (q->blk_trace == NULL) 1459 if (q->blk_trace == NULL)
1464 ret = sprintf(buf, "disabled\n"); 1460 ret = sprintf(buf, "disabled\n");
1465 else if (attr == &dev_attr_act_mask) 1461 else if (attr == &dev_attr_act_mask)
1466 ret = sprintf(buf, "%#x\n", q->blk_trace->act_mask); 1462 ret = blk_trace_mask2str(buf, q->blk_trace->act_mask);
1467 else if (attr == &dev_attr_pid) 1463 else if (attr == &dev_attr_pid)
1468 ret = sprintf(buf, "%u\n", q->blk_trace->pid); 1464 ret = sprintf(buf, "%u\n", q->blk_trace->pid);
1469 else if (attr == &dev_attr_start_lba) 1465 else if (attr == &dev_attr_start_lba)
1470 ret = sprintf(buf, "%llu\n", q->blk_trace->start_lba); 1466 ret = sprintf(buf, "%llu\n", q->blk_trace->start_lba);
1471 else if (attr == &dev_attr_end_lba) 1467 else if (attr == &dev_attr_end_lba)
1472 ret = sprintf(buf, "%llu\n", q->blk_trace->end_lba); 1468 ret = sprintf(buf, "%llu\n", q->blk_trace->end_lba);
1469
1470out_unlock_bdev:
1473 mutex_unlock(&bdev->bd_mutex); 1471 mutex_unlock(&bdev->bd_mutex);
1474out_bdput: 1472out_bdput:
1475 bdput(bdev); 1473 bdput(bdev);
@@ -1486,7 +1484,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
1486 struct request_queue *q; 1484 struct request_queue *q;
1487 struct hd_struct *p; 1485 struct hd_struct *p;
1488 u64 value; 1486 u64 value;
1489 ssize_t ret = -ENXIO; 1487 ssize_t ret = -EINVAL;
1490 1488
1491 if (count == 0) 1489 if (count == 0)
1492 goto out; 1490 goto out;
@@ -1494,24 +1492,36 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
1494 if (attr == &dev_attr_act_mask) { 1492 if (attr == &dev_attr_act_mask) {
1495 if (sscanf(buf, "%llx", &value) != 1) { 1493 if (sscanf(buf, "%llx", &value) != 1) {
1496 /* Assume it is a list of trace category names */ 1494 /* Assume it is a list of trace category names */
1497 value = blk_str2act_mask(buf); 1495 ret = blk_trace_str2mask(buf);
1498 if (value < 0) 1496 if (ret < 0)
1499 goto out; 1497 goto out;
1498 value = ret;
1500 } 1499 }
1501 } else if (sscanf(buf, "%llu", &value) != 1) 1500 } else if (sscanf(buf, "%llu", &value) != 1)
1502 goto out; 1501 goto out;
1503 1502
1503 ret = -ENXIO;
1504
1504 lock_kernel(); 1505 lock_kernel();
1505 p = dev_to_part(dev); 1506 p = dev_to_part(dev);
1506 bdev = bdget(part_devt(p)); 1507 bdev = bdget(part_devt(p));
1507 if (bdev == NULL) 1508 if (bdev == NULL)
1508 goto out_unlock_kernel; 1509 goto out_unlock_kernel;
1509 1510
1510 q = bdev_get_queue(bdev); 1511 q = blk_trace_get_queue(bdev);
1511 if (q == NULL) 1512 if (q == NULL)
1512 goto out_bdput; 1513 goto out_bdput;
1513 1514
1514 mutex_lock(&bdev->bd_mutex); 1515 mutex_lock(&bdev->bd_mutex);
1516
1517 if (attr == &dev_attr_enable) {
1518 if (value)
1519 ret = blk_trace_setup_queue(q, bdev->bd_dev);
1520 else
1521 ret = blk_trace_remove_queue(q);
1522 goto out_unlock_bdev;
1523 }
1524
1515 ret = 0; 1525 ret = 0;
1516 if (q->blk_trace == NULL) 1526 if (q->blk_trace == NULL)
1517 ret = blk_trace_setup_queue(q, bdev->bd_dev); 1527 ret = blk_trace_setup_queue(q, bdev->bd_dev);
@@ -1525,13 +1535,15 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
1525 q->blk_trace->start_lba = value; 1535 q->blk_trace->start_lba = value;
1526 else if (attr == &dev_attr_end_lba) 1536 else if (attr == &dev_attr_end_lba)
1527 q->blk_trace->end_lba = value; 1537 q->blk_trace->end_lba = value;
1528 ret = count;
1529 } 1538 }
1539
1540out_unlock_bdev:
1530 mutex_unlock(&bdev->bd_mutex); 1541 mutex_unlock(&bdev->bd_mutex);
1531out_bdput: 1542out_bdput:
1532 bdput(bdev); 1543 bdput(bdev);
1533out_unlock_kernel: 1544out_unlock_kernel:
1534 unlock_kernel(); 1545 unlock_kernel();
1535out: 1546out:
1536 return ret; 1547 return ret ? ret : count;
1537} 1548}
1549
diff --git a/kernel/trace/events.c b/kernel/trace/events.c
index 9fc918da404f..246f2aa6dc46 100644
--- a/kernel/trace/events.c
+++ b/kernel/trace/events.c
@@ -12,4 +12,3 @@
12#include "trace_events_stage_2.h" 12#include "trace_events_stage_2.h"
13#include "trace_events_stage_3.h" 13#include "trace_events_stage_3.h"
14 14
15#include <trace/trace_event_types.h>
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index d33d306bdcf4..f1ed080406c3 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -29,6 +29,8 @@
29#include <linux/list.h> 29#include <linux/list.h>
30#include <linux/hash.h> 30#include <linux/hash.h>
31 31
32#include <trace/sched.h>
33
32#include <asm/ftrace.h> 34#include <asm/ftrace.h>
33 35
34#include "trace.h" 36#include "trace.h"
@@ -272,7 +274,7 @@ enum {
272 274
273static int ftrace_filtered; 275static int ftrace_filtered;
274 276
275static LIST_HEAD(ftrace_new_addrs); 277static struct dyn_ftrace *ftrace_new_addrs;
276 278
277static DEFINE_MUTEX(ftrace_regex_lock); 279static DEFINE_MUTEX(ftrace_regex_lock);
278 280
@@ -339,7 +341,7 @@ static inline int record_frozen(struct dyn_ftrace *rec)
339 341
340static void ftrace_free_rec(struct dyn_ftrace *rec) 342static void ftrace_free_rec(struct dyn_ftrace *rec)
341{ 343{
342 rec->ip = (unsigned long)ftrace_free_records; 344 rec->freelist = ftrace_free_records;
343 ftrace_free_records = rec; 345 ftrace_free_records = rec;
344 rec->flags |= FTRACE_FL_FREE; 346 rec->flags |= FTRACE_FL_FREE;
345} 347}
@@ -356,8 +358,14 @@ void ftrace_release(void *start, unsigned long size)
356 358
357 mutex_lock(&ftrace_lock); 359 mutex_lock(&ftrace_lock);
358 do_for_each_ftrace_rec(pg, rec) { 360 do_for_each_ftrace_rec(pg, rec) {
359 if ((rec->ip >= s) && (rec->ip < e)) 361 if ((rec->ip >= s) && (rec->ip < e)) {
362 /*
363 * rec->ip is changed in ftrace_free_rec()
364 * It should not between s and e if record was freed.
365 */
366 FTRACE_WARN_ON(rec->flags & FTRACE_FL_FREE);
360 ftrace_free_rec(rec); 367 ftrace_free_rec(rec);
368 }
361 } while_for_each_ftrace_rec(); 369 } while_for_each_ftrace_rec();
362 mutex_unlock(&ftrace_lock); 370 mutex_unlock(&ftrace_lock);
363} 371}
@@ -376,7 +384,7 @@ static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip)
376 return NULL; 384 return NULL;
377 } 385 }
378 386
379 ftrace_free_records = (void *)rec->ip; 387 ftrace_free_records = rec->freelist;
380 memset(rec, 0, sizeof(*rec)); 388 memset(rec, 0, sizeof(*rec));
381 return rec; 389 return rec;
382 } 390 }
@@ -408,8 +416,8 @@ ftrace_record_ip(unsigned long ip)
408 return NULL; 416 return NULL;
409 417
410 rec->ip = ip; 418 rec->ip = ip;
411 419 rec->newlist = ftrace_new_addrs;
412 list_add(&rec->list, &ftrace_new_addrs); 420 ftrace_new_addrs = rec;
413 421
414 return rec; 422 return rec;
415} 423}
@@ -531,11 +539,12 @@ static void ftrace_replace_code(int enable)
531 539
532 do_for_each_ftrace_rec(pg, rec) { 540 do_for_each_ftrace_rec(pg, rec) {
533 /* 541 /*
534 * Skip over free records and records that have 542 * Skip over free records, records that have
535 * failed. 543 * failed and not converted.
536 */ 544 */
537 if (rec->flags & FTRACE_FL_FREE || 545 if (rec->flags & FTRACE_FL_FREE ||
538 rec->flags & FTRACE_FL_FAILED) 546 rec->flags & FTRACE_FL_FAILED ||
547 !(rec->flags & FTRACE_FL_CONVERTED))
539 continue; 548 continue;
540 549
541 /* ignore updates to this record's mcount site */ 550 /* ignore updates to this record's mcount site */
@@ -547,7 +556,7 @@ static void ftrace_replace_code(int enable)
547 } 556 }
548 557
549 failed = __ftrace_replace_code(rec, enable); 558 failed = __ftrace_replace_code(rec, enable);
550 if (failed && (rec->flags & FTRACE_FL_CONVERTED)) { 559 if (failed) {
551 rec->flags |= FTRACE_FL_FAILED; 560 rec->flags |= FTRACE_FL_FAILED;
552 if ((system_state == SYSTEM_BOOTING) || 561 if ((system_state == SYSTEM_BOOTING) ||
553 !core_kernel_text(rec->ip)) { 562 !core_kernel_text(rec->ip)) {
@@ -714,19 +723,21 @@ unsigned long ftrace_update_tot_cnt;
714 723
715static int ftrace_update_code(struct module *mod) 724static int ftrace_update_code(struct module *mod)
716{ 725{
717 struct dyn_ftrace *p, *t; 726 struct dyn_ftrace *p;
718 cycle_t start, stop; 727 cycle_t start, stop;
719 728
720 start = ftrace_now(raw_smp_processor_id()); 729 start = ftrace_now(raw_smp_processor_id());
721 ftrace_update_cnt = 0; 730 ftrace_update_cnt = 0;
722 731
723 list_for_each_entry_safe(p, t, &ftrace_new_addrs, list) { 732 while (ftrace_new_addrs) {
724 733
725 /* If something went wrong, bail without enabling anything */ 734 /* If something went wrong, bail without enabling anything */
726 if (unlikely(ftrace_disabled)) 735 if (unlikely(ftrace_disabled))
727 return -1; 736 return -1;
728 737
729 list_del_init(&p->list); 738 p = ftrace_new_addrs;
739 ftrace_new_addrs = p->newlist;
740 p->flags = 0L;
730 741
731 /* convert record (i.e, patch mcount-call with NOP) */ 742 /* convert record (i.e, patch mcount-call with NOP) */
732 if (ftrace_code_disable(mod, p)) { 743 if (ftrace_code_disable(mod, p)) {
@@ -1118,16 +1129,6 @@ ftrace_notrace_open(struct inode *inode, struct file *file)
1118 return ftrace_regex_open(inode, file, 0); 1129 return ftrace_regex_open(inode, file, 0);
1119} 1130}
1120 1131
1121static ssize_t
1122ftrace_regex_read(struct file *file, char __user *ubuf,
1123 size_t cnt, loff_t *ppos)
1124{
1125 if (file->f_mode & FMODE_READ)
1126 return seq_read(file, ubuf, cnt, ppos);
1127 else
1128 return -EPERM;
1129}
1130
1131static loff_t 1132static loff_t
1132ftrace_regex_lseek(struct file *file, loff_t offset, int origin) 1133ftrace_regex_lseek(struct file *file, loff_t offset, int origin)
1133{ 1134{
@@ -1880,7 +1881,7 @@ static const struct file_operations ftrace_failures_fops = {
1880 1881
1881static const struct file_operations ftrace_filter_fops = { 1882static const struct file_operations ftrace_filter_fops = {
1882 .open = ftrace_filter_open, 1883 .open = ftrace_filter_open,
1883 .read = ftrace_regex_read, 1884 .read = seq_read,
1884 .write = ftrace_filter_write, 1885 .write = ftrace_filter_write,
1885 .llseek = ftrace_regex_lseek, 1886 .llseek = ftrace_regex_lseek,
1886 .release = ftrace_filter_release, 1887 .release = ftrace_filter_release,
@@ -1888,7 +1889,7 @@ static const struct file_operations ftrace_filter_fops = {
1888 1889
1889static const struct file_operations ftrace_notrace_fops = { 1890static const struct file_operations ftrace_notrace_fops = {
1890 .open = ftrace_notrace_open, 1891 .open = ftrace_notrace_open,
1891 .read = ftrace_regex_read, 1892 .read = seq_read,
1892 .write = ftrace_notrace_write, 1893 .write = ftrace_notrace_write,
1893 .llseek = ftrace_regex_lseek, 1894 .llseek = ftrace_regex_lseek,
1894 .release = ftrace_notrace_release, 1895 .release = ftrace_notrace_release,
@@ -1990,16 +1991,6 @@ ftrace_graph_open(struct inode *inode, struct file *file)
1990 return ret; 1991 return ret;
1991} 1992}
1992 1993
1993static ssize_t
1994ftrace_graph_read(struct file *file, char __user *ubuf,
1995 size_t cnt, loff_t *ppos)
1996{
1997 if (file->f_mode & FMODE_READ)
1998 return seq_read(file, ubuf, cnt, ppos);
1999 else
2000 return -EPERM;
2001}
2002
2003static int 1994static int
2004ftrace_set_func(unsigned long *array, int *idx, char *buffer) 1995ftrace_set_func(unsigned long *array, int *idx, char *buffer)
2005{ 1996{
@@ -2130,7 +2121,7 @@ ftrace_graph_write(struct file *file, const char __user *ubuf,
2130 2121
2131static const struct file_operations ftrace_graph_fops = { 2122static const struct file_operations ftrace_graph_fops = {
2132 .open = ftrace_graph_open, 2123 .open = ftrace_graph_open,
2133 .read = ftrace_graph_read, 2124 .read = seq_read,
2134 .write = ftrace_graph_write, 2125 .write = ftrace_graph_write,
2135}; 2126};
2136#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ 2127#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
@@ -2278,7 +2269,7 @@ ftrace_pid_read(struct file *file, char __user *ubuf,
2278 if (ftrace_pid_trace == ftrace_swapper_pid) 2269 if (ftrace_pid_trace == ftrace_swapper_pid)
2279 r = sprintf(buf, "swapper tasks\n"); 2270 r = sprintf(buf, "swapper tasks\n");
2280 else if (ftrace_pid_trace) 2271 else if (ftrace_pid_trace)
2281 r = sprintf(buf, "%u\n", pid_nr(ftrace_pid_trace)); 2272 r = sprintf(buf, "%u\n", pid_vnr(ftrace_pid_trace));
2282 else 2273 else
2283 r = sprintf(buf, "no pid\n"); 2274 r = sprintf(buf, "no pid\n");
2284 2275
@@ -2606,6 +2597,38 @@ free:
2606 return ret; 2597 return ret;
2607} 2598}
2608 2599
2600static void
2601ftrace_graph_probe_sched_switch(struct rq *__rq, struct task_struct *prev,
2602 struct task_struct *next)
2603{
2604 unsigned long long timestamp;
2605 int index;
2606
2607 /*
2608 * Does the user want to count the time a function was asleep.
2609 * If so, do not update the time stamps.
2610 */
2611 if (trace_flags & TRACE_ITER_SLEEP_TIME)
2612 return;
2613
2614 timestamp = trace_clock_local();
2615
2616 prev->ftrace_timestamp = timestamp;
2617
2618 /* only process tasks that we timestamped */
2619 if (!next->ftrace_timestamp)
2620 return;
2621
2622 /*
2623 * Update all the counters in next to make up for the
2624 * time next was sleeping.
2625 */
2626 timestamp -= next->ftrace_timestamp;
2627
2628 for (index = next->curr_ret_stack; index >= 0; index--)
2629 next->ret_stack[index].calltime += timestamp;
2630}
2631
2609/* Allocate a return stack for each task */ 2632/* Allocate a return stack for each task */
2610static int start_graph_tracing(void) 2633static int start_graph_tracing(void)
2611{ 2634{
@@ -2627,6 +2650,13 @@ static int start_graph_tracing(void)
2627 ret = alloc_retstack_tasklist(ret_stack_list); 2650 ret = alloc_retstack_tasklist(ret_stack_list);
2628 } while (ret == -EAGAIN); 2651 } while (ret == -EAGAIN);
2629 2652
2653 if (!ret) {
2654 ret = register_trace_sched_switch(ftrace_graph_probe_sched_switch);
2655 if (ret)
2656 pr_info("ftrace_graph: Couldn't activate tracepoint"
2657 " probe to kernel_sched_switch\n");
2658 }
2659
2630 kfree(ret_stack_list); 2660 kfree(ret_stack_list);
2631 return ret; 2661 return ret;
2632} 2662}
@@ -2659,6 +2689,12 @@ int register_ftrace_graph(trace_func_graph_ret_t retfunc,
2659 2689
2660 mutex_lock(&ftrace_lock); 2690 mutex_lock(&ftrace_lock);
2661 2691
2692 /* we currently allow only one tracer registered at a time */
2693 if (atomic_read(&ftrace_graph_active)) {
2694 ret = -EBUSY;
2695 goto out;
2696 }
2697
2662 ftrace_suspend_notifier.notifier_call = ftrace_suspend_notifier_call; 2698 ftrace_suspend_notifier.notifier_call = ftrace_suspend_notifier_call;
2663 register_pm_notifier(&ftrace_suspend_notifier); 2699 register_pm_notifier(&ftrace_suspend_notifier);
2664 2700
@@ -2683,12 +2719,17 @@ void unregister_ftrace_graph(void)
2683{ 2719{
2684 mutex_lock(&ftrace_lock); 2720 mutex_lock(&ftrace_lock);
2685 2721
2722 if (!unlikely(atomic_read(&ftrace_graph_active)))
2723 goto out;
2724
2686 atomic_dec(&ftrace_graph_active); 2725 atomic_dec(&ftrace_graph_active);
2726 unregister_trace_sched_switch(ftrace_graph_probe_sched_switch);
2687 ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub; 2727 ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub;
2688 ftrace_graph_entry = ftrace_graph_entry_stub; 2728 ftrace_graph_entry = ftrace_graph_entry_stub;
2689 ftrace_shutdown(FTRACE_STOP_FUNC_RET); 2729 ftrace_shutdown(FTRACE_STOP_FUNC_RET);
2690 unregister_pm_notifier(&ftrace_suspend_notifier); 2730 unregister_pm_notifier(&ftrace_suspend_notifier);
2691 2731
2732 out:
2692 mutex_unlock(&ftrace_lock); 2733 mutex_unlock(&ftrace_lock);
2693} 2734}
2694 2735
@@ -2704,6 +2745,7 @@ void ftrace_graph_init_task(struct task_struct *t)
2704 t->curr_ret_stack = -1; 2745 t->curr_ret_stack = -1;
2705 atomic_set(&t->tracing_graph_pause, 0); 2746 atomic_set(&t->tracing_graph_pause, 0);
2706 atomic_set(&t->trace_overrun, 0); 2747 atomic_set(&t->trace_overrun, 0);
2748 t->ftrace_timestamp = 0;
2707 } else 2749 } else
2708 t->ret_stack = NULL; 2750 t->ret_stack = NULL;
2709} 2751}
diff --git a/kernel/trace/kmemtrace.c b/kernel/trace/kmemtrace.c
index ae201b3eda89..5011f4d91e37 100644
--- a/kernel/trace/kmemtrace.c
+++ b/kernel/trace/kmemtrace.c
@@ -6,14 +6,16 @@
6 * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com> 6 * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
7 */ 7 */
8 8
9#include <linux/dcache.h> 9#include <linux/tracepoint.h>
10#include <linux/seq_file.h>
10#include <linux/debugfs.h> 11#include <linux/debugfs.h>
12#include <linux/dcache.h>
11#include <linux/fs.h> 13#include <linux/fs.h>
12#include <linux/seq_file.h> 14
13#include <trace/kmemtrace.h> 15#include <trace/kmemtrace.h>
14 16
15#include "trace.h"
16#include "trace_output.h" 17#include "trace_output.h"
18#include "trace.h"
17 19
18/* Select an alternative, minimalistic output than the original one */ 20/* Select an alternative, minimalistic output than the original one */
19#define TRACE_KMEM_OPT_MINIMAL 0x1 21#define TRACE_KMEM_OPT_MINIMAL 0x1
@@ -25,14 +27,156 @@ static struct tracer_opt kmem_opts[] = {
25}; 27};
26 28
27static struct tracer_flags kmem_tracer_flags = { 29static struct tracer_flags kmem_tracer_flags = {
28 .val = 0, 30 .val = 0,
29 .opts = kmem_opts 31 .opts = kmem_opts
30}; 32};
31 33
32
33static bool kmem_tracing_enabled __read_mostly;
34static struct trace_array *kmemtrace_array; 34static struct trace_array *kmemtrace_array;
35 35
36/* Trace allocations */
37static inline void kmemtrace_alloc(enum kmemtrace_type_id type_id,
38 unsigned long call_site,
39 const void *ptr,
40 size_t bytes_req,
41 size_t bytes_alloc,
42 gfp_t gfp_flags,
43 int node)
44{
45 struct trace_array *tr = kmemtrace_array;
46 struct kmemtrace_alloc_entry *entry;
47 struct ring_buffer_event *event;
48
49 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry));
50 if (!event)
51 return;
52
53 entry = ring_buffer_event_data(event);
54 tracing_generic_entry_update(&entry->ent, 0, 0);
55
56 entry->ent.type = TRACE_KMEM_ALLOC;
57 entry->type_id = type_id;
58 entry->call_site = call_site;
59 entry->ptr = ptr;
60 entry->bytes_req = bytes_req;
61 entry->bytes_alloc = bytes_alloc;
62 entry->gfp_flags = gfp_flags;
63 entry->node = node;
64
65 ring_buffer_unlock_commit(tr->buffer, event);
66
67 trace_wake_up();
68}
69
70static inline void kmemtrace_free(enum kmemtrace_type_id type_id,
71 unsigned long call_site,
72 const void *ptr)
73{
74 struct trace_array *tr = kmemtrace_array;
75 struct kmemtrace_free_entry *entry;
76 struct ring_buffer_event *event;
77
78 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry));
79 if (!event)
80 return;
81 entry = ring_buffer_event_data(event);
82 tracing_generic_entry_update(&entry->ent, 0, 0);
83
84 entry->ent.type = TRACE_KMEM_FREE;
85 entry->type_id = type_id;
86 entry->call_site = call_site;
87 entry->ptr = ptr;
88
89 ring_buffer_unlock_commit(tr->buffer, event);
90
91 trace_wake_up();
92}
93
94static void kmemtrace_kmalloc(unsigned long call_site,
95 const void *ptr,
96 size_t bytes_req,
97 size_t bytes_alloc,
98 gfp_t gfp_flags)
99{
100 kmemtrace_alloc(KMEMTRACE_TYPE_KMALLOC, call_site, ptr,
101 bytes_req, bytes_alloc, gfp_flags, -1);
102}
103
104static void kmemtrace_kmem_cache_alloc(unsigned long call_site,
105 const void *ptr,
106 size_t bytes_req,
107 size_t bytes_alloc,
108 gfp_t gfp_flags)
109{
110 kmemtrace_alloc(KMEMTRACE_TYPE_CACHE, call_site, ptr,
111 bytes_req, bytes_alloc, gfp_flags, -1);
112}
113
114static void kmemtrace_kmalloc_node(unsigned long call_site,
115 const void *ptr,
116 size_t bytes_req,
117 size_t bytes_alloc,
118 gfp_t gfp_flags,
119 int node)
120{
121 kmemtrace_alloc(KMEMTRACE_TYPE_KMALLOC, call_site, ptr,
122 bytes_req, bytes_alloc, gfp_flags, node);
123}
124
125static void kmemtrace_kmem_cache_alloc_node(unsigned long call_site,
126 const void *ptr,
127 size_t bytes_req,
128 size_t bytes_alloc,
129 gfp_t gfp_flags,
130 int node)
131{
132 kmemtrace_alloc(KMEMTRACE_TYPE_CACHE, call_site, ptr,
133 bytes_req, bytes_alloc, gfp_flags, node);
134}
135
136static void kmemtrace_kfree(unsigned long call_site, const void *ptr)
137{
138 kmemtrace_free(KMEMTRACE_TYPE_KMALLOC, call_site, ptr);
139}
140
141static void kmemtrace_kmem_cache_free(unsigned long call_site, const void *ptr)
142{
143 kmemtrace_free(KMEMTRACE_TYPE_CACHE, call_site, ptr);
144}
145
146static int kmemtrace_start_probes(void)
147{
148 int err;
149
150 err = register_trace_kmalloc(kmemtrace_kmalloc);
151 if (err)
152 return err;
153 err = register_trace_kmem_cache_alloc(kmemtrace_kmem_cache_alloc);
154 if (err)
155 return err;
156 err = register_trace_kmalloc_node(kmemtrace_kmalloc_node);
157 if (err)
158 return err;
159 err = register_trace_kmem_cache_alloc_node(kmemtrace_kmem_cache_alloc_node);
160 if (err)
161 return err;
162 err = register_trace_kfree(kmemtrace_kfree);
163 if (err)
164 return err;
165 err = register_trace_kmem_cache_free(kmemtrace_kmem_cache_free);
166
167 return err;
168}
169
170static void kmemtrace_stop_probes(void)
171{
172 unregister_trace_kmalloc(kmemtrace_kmalloc);
173 unregister_trace_kmem_cache_alloc(kmemtrace_kmem_cache_alloc);
174 unregister_trace_kmalloc_node(kmemtrace_kmalloc_node);
175 unregister_trace_kmem_cache_alloc_node(kmemtrace_kmem_cache_alloc_node);
176 unregister_trace_kfree(kmemtrace_kfree);
177 unregister_trace_kmem_cache_free(kmemtrace_kmem_cache_free);
178}
179
36static int kmem_trace_init(struct trace_array *tr) 180static int kmem_trace_init(struct trace_array *tr)
37{ 181{
38 int cpu; 182 int cpu;
@@ -41,14 +185,14 @@ static int kmem_trace_init(struct trace_array *tr)
41 for_each_cpu_mask(cpu, cpu_possible_map) 185 for_each_cpu_mask(cpu, cpu_possible_map)
42 tracing_reset(tr, cpu); 186 tracing_reset(tr, cpu);
43 187
44 kmem_tracing_enabled = true; 188 kmemtrace_start_probes();
45 189
46 return 0; 190 return 0;
47} 191}
48 192
49static void kmem_trace_reset(struct trace_array *tr) 193static void kmem_trace_reset(struct trace_array *tr)
50{ 194{
51 kmem_tracing_enabled = false; 195 kmemtrace_stop_probes();
52} 196}
53 197
54static void kmemtrace_headers(struct seq_file *s) 198static void kmemtrace_headers(struct seq_file *s)
@@ -66,47 +210,84 @@ static void kmemtrace_headers(struct seq_file *s)
66} 210}
67 211
68/* 212/*
69 * The two following functions give the original output from kmemtrace, 213 * The following functions give the original output from kmemtrace,
70 * or something close to....perhaps they need some missing things 214 * plus the origin CPU, since reordering occurs in-kernel now.
71 */ 215 */
216
217#define KMEMTRACE_USER_ALLOC 0
218#define KMEMTRACE_USER_FREE 1
219
220struct kmemtrace_user_event {
221 u8 event_id;
222 u8 type_id;
223 u16 event_size;
224 u32 cpu;
225 u64 timestamp;
226 unsigned long call_site;
227 unsigned long ptr;
228};
229
230struct kmemtrace_user_event_alloc {
231 size_t bytes_req;
232 size_t bytes_alloc;
233 unsigned gfp_flags;
234 int node;
235};
236
72static enum print_line_t 237static enum print_line_t
73kmemtrace_print_alloc_original(struct trace_iterator *iter, 238kmemtrace_print_alloc_user(struct trace_iterator *iter,
74 struct kmemtrace_alloc_entry *entry) 239 struct kmemtrace_alloc_entry *entry)
75{ 240{
241 struct kmemtrace_user_event_alloc *ev_alloc;
76 struct trace_seq *s = &iter->seq; 242 struct trace_seq *s = &iter->seq;
77 int ret; 243 struct kmemtrace_user_event *ev;
244
245 ev = trace_seq_reserve(s, sizeof(*ev));
246 if (!ev)
247 return TRACE_TYPE_PARTIAL_LINE;
78 248
79 /* Taken from the old linux/kmemtrace.h */ 249 ev->event_id = KMEMTRACE_USER_ALLOC;
80 ret = trace_seq_printf(s, "type_id %d call_site %lu ptr %lu " 250 ev->type_id = entry->type_id;
81 "bytes_req %lu bytes_alloc %lu gfp_flags %lu node %d\n", 251 ev->event_size = sizeof(*ev) + sizeof(*ev_alloc);
82 entry->type_id, entry->call_site, (unsigned long) entry->ptr, 252 ev->cpu = iter->cpu;
83 (unsigned long) entry->bytes_req, (unsigned long) entry->bytes_alloc, 253 ev->timestamp = iter->ts;
84 (unsigned long) entry->gfp_flags, entry->node); 254 ev->call_site = entry->call_site;
255 ev->ptr = (unsigned long)entry->ptr;
85 256
86 if (!ret) 257 ev_alloc = trace_seq_reserve(s, sizeof(*ev_alloc));
258 if (!ev_alloc)
87 return TRACE_TYPE_PARTIAL_LINE; 259 return TRACE_TYPE_PARTIAL_LINE;
88 260
261 ev_alloc->bytes_req = entry->bytes_req;
262 ev_alloc->bytes_alloc = entry->bytes_alloc;
263 ev_alloc->gfp_flags = entry->gfp_flags;
264 ev_alloc->node = entry->node;
265
89 return TRACE_TYPE_HANDLED; 266 return TRACE_TYPE_HANDLED;
90} 267}
91 268
92static enum print_line_t 269static enum print_line_t
93kmemtrace_print_free_original(struct trace_iterator *iter, 270kmemtrace_print_free_user(struct trace_iterator *iter,
94 struct kmemtrace_free_entry *entry) 271 struct kmemtrace_free_entry *entry)
95{ 272{
96 struct trace_seq *s = &iter->seq; 273 struct trace_seq *s = &iter->seq;
97 int ret; 274 struct kmemtrace_user_event *ev;
98 275
99 /* Taken from the old linux/kmemtrace.h */ 276 ev = trace_seq_reserve(s, sizeof(*ev));
100 ret = trace_seq_printf(s, "type_id %d call_site %lu ptr %lu\n", 277 if (!ev)
101 entry->type_id, entry->call_site, (unsigned long) entry->ptr);
102
103 if (!ret)
104 return TRACE_TYPE_PARTIAL_LINE; 278 return TRACE_TYPE_PARTIAL_LINE;
105 279
280 ev->event_id = KMEMTRACE_USER_FREE;
281 ev->type_id = entry->type_id;
282 ev->event_size = sizeof(*ev);
283 ev->cpu = iter->cpu;
284 ev->timestamp = iter->ts;
285 ev->call_site = entry->call_site;
286 ev->ptr = (unsigned long)entry->ptr;
287
106 return TRACE_TYPE_HANDLED; 288 return TRACE_TYPE_HANDLED;
107} 289}
108 290
109
110/* The two other following provide a more minimalistic output */ 291/* The two other following provide a more minimalistic output */
111static enum print_line_t 292static enum print_line_t
112kmemtrace_print_alloc_compress(struct trace_iterator *iter, 293kmemtrace_print_alloc_compress(struct trace_iterator *iter,
@@ -178,7 +359,7 @@ kmemtrace_print_alloc_compress(struct trace_iterator *iter,
178 359
179static enum print_line_t 360static enum print_line_t
180kmemtrace_print_free_compress(struct trace_iterator *iter, 361kmemtrace_print_free_compress(struct trace_iterator *iter,
181 struct kmemtrace_free_entry *entry) 362 struct kmemtrace_free_entry *entry)
182{ 363{
183 struct trace_seq *s = &iter->seq; 364 struct trace_seq *s = &iter->seq;
184 int ret; 365 int ret;
@@ -239,20 +420,22 @@ static enum print_line_t kmemtrace_print_line(struct trace_iterator *iter)
239 switch (entry->type) { 420 switch (entry->type) {
240 case TRACE_KMEM_ALLOC: { 421 case TRACE_KMEM_ALLOC: {
241 struct kmemtrace_alloc_entry *field; 422 struct kmemtrace_alloc_entry *field;
423
242 trace_assign_type(field, entry); 424 trace_assign_type(field, entry);
243 if (kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL) 425 if (kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL)
244 return kmemtrace_print_alloc_compress(iter, field); 426 return kmemtrace_print_alloc_compress(iter, field);
245 else 427 else
246 return kmemtrace_print_alloc_original(iter, field); 428 return kmemtrace_print_alloc_user(iter, field);
247 } 429 }
248 430
249 case TRACE_KMEM_FREE: { 431 case TRACE_KMEM_FREE: {
250 struct kmemtrace_free_entry *field; 432 struct kmemtrace_free_entry *field;
433
251 trace_assign_type(field, entry); 434 trace_assign_type(field, entry);
252 if (kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL) 435 if (kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL)
253 return kmemtrace_print_free_compress(iter, field); 436 return kmemtrace_print_free_compress(iter, field);
254 else 437 else
255 return kmemtrace_print_free_original(iter, field); 438 return kmemtrace_print_free_user(iter, field);
256 } 439 }
257 440
258 default: 441 default:
@@ -260,70 +443,13 @@ static enum print_line_t kmemtrace_print_line(struct trace_iterator *iter)
260 } 443 }
261} 444}
262 445
263/* Trace allocations */
264void kmemtrace_mark_alloc_node(enum kmemtrace_type_id type_id,
265 unsigned long call_site,
266 const void *ptr,
267 size_t bytes_req,
268 size_t bytes_alloc,
269 gfp_t gfp_flags,
270 int node)
271{
272 struct ring_buffer_event *event;
273 struct kmemtrace_alloc_entry *entry;
274 struct trace_array *tr = kmemtrace_array;
275
276 if (!kmem_tracing_enabled)
277 return;
278
279 event = trace_buffer_lock_reserve(tr, TRACE_KMEM_ALLOC,
280 sizeof(*entry), 0, 0);
281 if (!event)
282 return;
283 entry = ring_buffer_event_data(event);
284
285 entry->call_site = call_site;
286 entry->ptr = ptr;
287 entry->bytes_req = bytes_req;
288 entry->bytes_alloc = bytes_alloc;
289 entry->gfp_flags = gfp_flags;
290 entry->node = node;
291
292 trace_buffer_unlock_commit(tr, event, 0, 0);
293}
294EXPORT_SYMBOL(kmemtrace_mark_alloc_node);
295
296void kmemtrace_mark_free(enum kmemtrace_type_id type_id,
297 unsigned long call_site,
298 const void *ptr)
299{
300 struct ring_buffer_event *event;
301 struct kmemtrace_free_entry *entry;
302 struct trace_array *tr = kmemtrace_array;
303
304 if (!kmem_tracing_enabled)
305 return;
306
307 event = trace_buffer_lock_reserve(tr, TRACE_KMEM_FREE,
308 sizeof(*entry), 0, 0);
309 if (!event)
310 return;
311 entry = ring_buffer_event_data(event);
312 entry->type_id = type_id;
313 entry->call_site = call_site;
314 entry->ptr = ptr;
315
316 trace_buffer_unlock_commit(tr, event, 0, 0);
317}
318EXPORT_SYMBOL(kmemtrace_mark_free);
319
320static struct tracer kmem_tracer __read_mostly = { 446static struct tracer kmem_tracer __read_mostly = {
321 .name = "kmemtrace", 447 .name = "kmemtrace",
322 .init = kmem_trace_init, 448 .init = kmem_trace_init,
323 .reset = kmem_trace_reset, 449 .reset = kmem_trace_reset,
324 .print_line = kmemtrace_print_line, 450 .print_line = kmemtrace_print_line,
325 .print_header = kmemtrace_headers, 451 .print_header = kmemtrace_headers,
326 .flags = &kmem_tracer_flags 452 .flags = &kmem_tracer_flags
327}; 453};
328 454
329void kmemtrace_init(void) 455void kmemtrace_init(void)
@@ -335,5 +461,4 @@ static int __init init_kmem_tracer(void)
335{ 461{
336 return register_tracer(&kmem_tracer); 462 return register_tracer(&kmem_tracer);
337} 463}
338
339device_initcall(init_kmem_tracer); 464device_initcall(init_kmem_tracer);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 58128ad2fde0..960cbf44c844 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -180,48 +180,74 @@ EXPORT_SYMBOL_GPL(tracing_is_on);
180 180
181#include "trace.h" 181#include "trace.h"
182 182
183/* Up this if you want to test the TIME_EXTENTS and normalization */ 183#define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array))
184#define DEBUG_SHIFT 0 184#define RB_ALIGNMENT 4U
185#define RB_MAX_SMALL_DATA 28
185 186
186u64 ring_buffer_time_stamp(int cpu) 187enum {
188 RB_LEN_TIME_EXTEND = 8,
189 RB_LEN_TIME_STAMP = 16,
190};
191
192static inline int rb_null_event(struct ring_buffer_event *event)
187{ 193{
188 u64 time; 194 return event->type == RINGBUF_TYPE_PADDING && event->time_delta == 0;
195}
189 196
190 preempt_disable_notrace(); 197static inline int rb_discarded_event(struct ring_buffer_event *event)
191 /* shift to debug/test normalization and TIME_EXTENTS */ 198{
192 time = trace_clock_local() << DEBUG_SHIFT; 199 return event->type == RINGBUF_TYPE_PADDING && event->time_delta;
193 preempt_enable_no_resched_notrace(); 200}
194 201
195 return time; 202static void rb_event_set_padding(struct ring_buffer_event *event)
203{
204 event->type = RINGBUF_TYPE_PADDING;
205 event->time_delta = 0;
196} 206}
197EXPORT_SYMBOL_GPL(ring_buffer_time_stamp);
198 207
199void ring_buffer_normalize_time_stamp(int cpu, u64 *ts) 208/**
209 * ring_buffer_event_discard - discard an event in the ring buffer
210 * @buffer: the ring buffer
211 * @event: the event to discard
212 *
213 * Sometimes a event that is in the ring buffer needs to be ignored.
214 * This function lets the user discard an event in the ring buffer
215 * and then that event will not be read later.
216 *
217 * Note, it is up to the user to be careful with this, and protect
218 * against races. If the user discards an event that has been consumed
219 * it is possible that it could corrupt the ring buffer.
220 */
221void ring_buffer_event_discard(struct ring_buffer_event *event)
200{ 222{
201 /* Just stupid testing the normalize function and deltas */ 223 event->type = RINGBUF_TYPE_PADDING;
202 *ts >>= DEBUG_SHIFT; 224 /* time delta must be non zero */
225 if (!event->time_delta)
226 event->time_delta = 1;
203} 227}
204EXPORT_SYMBOL_GPL(ring_buffer_normalize_time_stamp);
205 228
206#define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array)) 229static unsigned
207#define RB_ALIGNMENT 4U 230rb_event_data_length(struct ring_buffer_event *event)
208#define RB_MAX_SMALL_DATA 28 231{
232 unsigned length;
209 233
210enum { 234 if (event->len)
211 RB_LEN_TIME_EXTEND = 8, 235 length = event->len * RB_ALIGNMENT;
212 RB_LEN_TIME_STAMP = 16, 236 else
213}; 237 length = event->array[0];
238 return length + RB_EVNT_HDR_SIZE;
239}
214 240
215/* inline for ring buffer fast paths */ 241/* inline for ring buffer fast paths */
216static unsigned 242static unsigned
217rb_event_length(struct ring_buffer_event *event) 243rb_event_length(struct ring_buffer_event *event)
218{ 244{
219 unsigned length;
220
221 switch (event->type) { 245 switch (event->type) {
222 case RINGBUF_TYPE_PADDING: 246 case RINGBUF_TYPE_PADDING:
223 /* undefined */ 247 if (rb_null_event(event))
224 return -1; 248 /* undefined */
249 return -1;
250 return rb_event_data_length(event);
225 251
226 case RINGBUF_TYPE_TIME_EXTEND: 252 case RINGBUF_TYPE_TIME_EXTEND:
227 return RB_LEN_TIME_EXTEND; 253 return RB_LEN_TIME_EXTEND;
@@ -230,11 +256,7 @@ rb_event_length(struct ring_buffer_event *event)
230 return RB_LEN_TIME_STAMP; 256 return RB_LEN_TIME_STAMP;
231 257
232 case RINGBUF_TYPE_DATA: 258 case RINGBUF_TYPE_DATA:
233 if (event->len) 259 return rb_event_data_length(event);
234 length = event->len * RB_ALIGNMENT;
235 else
236 length = event->array[0];
237 return length + RB_EVNT_HDR_SIZE;
238 default: 260 default:
239 BUG(); 261 BUG();
240 } 262 }
@@ -374,6 +396,7 @@ struct ring_buffer {
374#ifdef CONFIG_HOTPLUG_CPU 396#ifdef CONFIG_HOTPLUG_CPU
375 struct notifier_block cpu_notify; 397 struct notifier_block cpu_notify;
376#endif 398#endif
399 u64 (*clock)(void);
377}; 400};
378 401
379struct ring_buffer_iter { 402struct ring_buffer_iter {
@@ -394,6 +417,30 @@ struct ring_buffer_iter {
394 _____ret; \ 417 _____ret; \
395 }) 418 })
396 419
420/* Up this if you want to test the TIME_EXTENTS and normalization */
421#define DEBUG_SHIFT 0
422
423u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu)
424{
425 u64 time;
426
427 preempt_disable_notrace();
428 /* shift to debug/test normalization and TIME_EXTENTS */
429 time = buffer->clock() << DEBUG_SHIFT;
430 preempt_enable_no_resched_notrace();
431
432 return time;
433}
434EXPORT_SYMBOL_GPL(ring_buffer_time_stamp);
435
436void ring_buffer_normalize_time_stamp(struct ring_buffer *buffer,
437 int cpu, u64 *ts)
438{
439 /* Just stupid testing the normalize function and deltas */
440 *ts >>= DEBUG_SHIFT;
441}
442EXPORT_SYMBOL_GPL(ring_buffer_normalize_time_stamp);
443
397/** 444/**
398 * check_pages - integrity check of buffer pages 445 * check_pages - integrity check of buffer pages
399 * @cpu_buffer: CPU buffer with pages to test 446 * @cpu_buffer: CPU buffer with pages to test
@@ -516,7 +563,6 @@ static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
516 struct list_head *head = &cpu_buffer->pages; 563 struct list_head *head = &cpu_buffer->pages;
517 struct buffer_page *bpage, *tmp; 564 struct buffer_page *bpage, *tmp;
518 565
519 list_del_init(&cpu_buffer->reader_page->list);
520 free_buffer_page(cpu_buffer->reader_page); 566 free_buffer_page(cpu_buffer->reader_page);
521 567
522 list_for_each_entry_safe(bpage, tmp, head, list) { 568 list_for_each_entry_safe(bpage, tmp, head, list) {
@@ -533,8 +579,8 @@ static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
533extern int ring_buffer_page_too_big(void); 579extern int ring_buffer_page_too_big(void);
534 580
535#ifdef CONFIG_HOTPLUG_CPU 581#ifdef CONFIG_HOTPLUG_CPU
536static int __cpuinit rb_cpu_notify(struct notifier_block *self, 582static int rb_cpu_notify(struct notifier_block *self,
537 unsigned long action, void *hcpu); 583 unsigned long action, void *hcpu);
538#endif 584#endif
539 585
540/** 586/**
@@ -569,13 +615,23 @@ struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags)
569 615
570 buffer->pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE); 616 buffer->pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
571 buffer->flags = flags; 617 buffer->flags = flags;
618 buffer->clock = trace_clock_local;
572 619
573 /* need at least two pages */ 620 /* need at least two pages */
574 if (buffer->pages == 1) 621 if (buffer->pages == 1)
575 buffer->pages++; 622 buffer->pages++;
576 623
624 /*
625 * In case of non-hotplug cpu, if the ring-buffer is allocated
626 * in early initcall, it will not be notified of secondary cpus.
627 * In that off case, we need to allocate for all possible cpus.
628 */
629#ifdef CONFIG_HOTPLUG_CPU
577 get_online_cpus(); 630 get_online_cpus();
578 cpumask_copy(buffer->cpumask, cpu_online_mask); 631 cpumask_copy(buffer->cpumask, cpu_online_mask);
632#else
633 cpumask_copy(buffer->cpumask, cpu_possible_mask);
634#endif
579 buffer->cpus = nr_cpu_ids; 635 buffer->cpus = nr_cpu_ids;
580 636
581 bsize = sizeof(void *) * nr_cpu_ids; 637 bsize = sizeof(void *) * nr_cpu_ids;
@@ -645,6 +701,12 @@ ring_buffer_free(struct ring_buffer *buffer)
645} 701}
646EXPORT_SYMBOL_GPL(ring_buffer_free); 702EXPORT_SYMBOL_GPL(ring_buffer_free);
647 703
704void ring_buffer_set_clock(struct ring_buffer *buffer,
705 u64 (*clock)(void))
706{
707 buffer->clock = clock;
708}
709
648static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer); 710static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer);
649 711
650static void 712static void
@@ -827,11 +889,6 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
827} 889}
828EXPORT_SYMBOL_GPL(ring_buffer_resize); 890EXPORT_SYMBOL_GPL(ring_buffer_resize);
829 891
830static inline int rb_null_event(struct ring_buffer_event *event)
831{
832 return event->type == RINGBUF_TYPE_PADDING;
833}
834
835static inline void * 892static inline void *
836__rb_data_page_index(struct buffer_data_page *bpage, unsigned index) 893__rb_data_page_index(struct buffer_data_page *bpage, unsigned index)
837{ 894{
@@ -1191,7 +1248,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
1191 cpu_buffer->tail_page = next_page; 1248 cpu_buffer->tail_page = next_page;
1192 1249
1193 /* reread the time stamp */ 1250 /* reread the time stamp */
1194 *ts = ring_buffer_time_stamp(cpu_buffer->cpu); 1251 *ts = ring_buffer_time_stamp(buffer, cpu_buffer->cpu);
1195 cpu_buffer->tail_page->page->time_stamp = *ts; 1252 cpu_buffer->tail_page->page->time_stamp = *ts;
1196 } 1253 }
1197 1254
@@ -1201,7 +1258,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
1201 if (tail < BUF_PAGE_SIZE) { 1258 if (tail < BUF_PAGE_SIZE) {
1202 /* Mark the rest of the page with padding */ 1259 /* Mark the rest of the page with padding */
1203 event = __rb_page_index(tail_page, tail); 1260 event = __rb_page_index(tail_page, tail);
1204 event->type = RINGBUF_TYPE_PADDING; 1261 rb_event_set_padding(event);
1205 } 1262 }
1206 1263
1207 if (tail <= BUF_PAGE_SIZE) 1264 if (tail <= BUF_PAGE_SIZE)
@@ -1334,7 +1391,7 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
1334 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000)) 1391 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000))
1335 return NULL; 1392 return NULL;
1336 1393
1337 ts = ring_buffer_time_stamp(cpu_buffer->cpu); 1394 ts = ring_buffer_time_stamp(cpu_buffer->buffer, cpu_buffer->cpu);
1338 1395
1339 /* 1396 /*
1340 * Only the first commit can update the timestamp. 1397 * Only the first commit can update the timestamp.
@@ -1951,7 +2008,7 @@ static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer)
1951 2008
1952 event = rb_reader_event(cpu_buffer); 2009 event = rb_reader_event(cpu_buffer);
1953 2010
1954 if (event->type == RINGBUF_TYPE_DATA) 2011 if (event->type == RINGBUF_TYPE_DATA || rb_discarded_event(event))
1955 cpu_buffer->entries--; 2012 cpu_buffer->entries--;
1956 2013
1957 rb_update_read_stamp(cpu_buffer, event); 2014 rb_update_read_stamp(cpu_buffer, event);
@@ -2034,9 +2091,18 @@ rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
2034 2091
2035 switch (event->type) { 2092 switch (event->type) {
2036 case RINGBUF_TYPE_PADDING: 2093 case RINGBUF_TYPE_PADDING:
2037 RB_WARN_ON(cpu_buffer, 1); 2094 if (rb_null_event(event))
2095 RB_WARN_ON(cpu_buffer, 1);
2096 /*
2097 * Because the writer could be discarding every
2098 * event it creates (which would probably be bad)
2099 * if we were to go back to "again" then we may never
2100 * catch up, and will trigger the warn on, or lock
2101 * the box. Return the padding, and we will release
2102 * the current locks, and try again.
2103 */
2038 rb_advance_reader(cpu_buffer); 2104 rb_advance_reader(cpu_buffer);
2039 return NULL; 2105 return event;
2040 2106
2041 case RINGBUF_TYPE_TIME_EXTEND: 2107 case RINGBUF_TYPE_TIME_EXTEND:
2042 /* Internal data, OK to advance */ 2108 /* Internal data, OK to advance */
@@ -2051,7 +2117,8 @@ rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
2051 case RINGBUF_TYPE_DATA: 2117 case RINGBUF_TYPE_DATA:
2052 if (ts) { 2118 if (ts) {
2053 *ts = cpu_buffer->read_stamp + event->time_delta; 2119 *ts = cpu_buffer->read_stamp + event->time_delta;
2054 ring_buffer_normalize_time_stamp(cpu_buffer->cpu, ts); 2120 ring_buffer_normalize_time_stamp(buffer,
2121 cpu_buffer->cpu, ts);
2055 } 2122 }
2056 return event; 2123 return event;
2057 2124
@@ -2096,8 +2163,12 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
2096 2163
2097 switch (event->type) { 2164 switch (event->type) {
2098 case RINGBUF_TYPE_PADDING: 2165 case RINGBUF_TYPE_PADDING:
2099 rb_inc_iter(iter); 2166 if (rb_null_event(event)) {
2100 goto again; 2167 rb_inc_iter(iter);
2168 goto again;
2169 }
2170 rb_advance_iter(iter);
2171 return event;
2101 2172
2102 case RINGBUF_TYPE_TIME_EXTEND: 2173 case RINGBUF_TYPE_TIME_EXTEND:
2103 /* Internal data, OK to advance */ 2174 /* Internal data, OK to advance */
@@ -2112,7 +2183,8 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
2112 case RINGBUF_TYPE_DATA: 2183 case RINGBUF_TYPE_DATA:
2113 if (ts) { 2184 if (ts) {
2114 *ts = iter->read_stamp + event->time_delta; 2185 *ts = iter->read_stamp + event->time_delta;
2115 ring_buffer_normalize_time_stamp(cpu_buffer->cpu, ts); 2186 ring_buffer_normalize_time_stamp(buffer,
2187 cpu_buffer->cpu, ts);
2116 } 2188 }
2117 return event; 2189 return event;
2118 2190
@@ -2143,10 +2215,16 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
2143 if (!cpumask_test_cpu(cpu, buffer->cpumask)) 2215 if (!cpumask_test_cpu(cpu, buffer->cpumask))
2144 return NULL; 2216 return NULL;
2145 2217
2218 again:
2146 spin_lock_irqsave(&cpu_buffer->reader_lock, flags); 2219 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
2147 event = rb_buffer_peek(buffer, cpu, ts); 2220 event = rb_buffer_peek(buffer, cpu, ts);
2148 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 2221 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
2149 2222
2223 if (event && event->type == RINGBUF_TYPE_PADDING) {
2224 cpu_relax();
2225 goto again;
2226 }
2227
2150 return event; 2228 return event;
2151} 2229}
2152 2230
@@ -2165,10 +2243,16 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
2165 struct ring_buffer_event *event; 2243 struct ring_buffer_event *event;
2166 unsigned long flags; 2244 unsigned long flags;
2167 2245
2246 again:
2168 spin_lock_irqsave(&cpu_buffer->reader_lock, flags); 2247 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
2169 event = rb_iter_peek(iter, ts); 2248 event = rb_iter_peek(iter, ts);
2170 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 2249 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
2171 2250
2251 if (event && event->type == RINGBUF_TYPE_PADDING) {
2252 cpu_relax();
2253 goto again;
2254 }
2255
2172 return event; 2256 return event;
2173} 2257}
2174 2258
@@ -2187,6 +2271,7 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
2187 struct ring_buffer_event *event = NULL; 2271 struct ring_buffer_event *event = NULL;
2188 unsigned long flags; 2272 unsigned long flags;
2189 2273
2274 again:
2190 /* might be called in atomic */ 2275 /* might be called in atomic */
2191 preempt_disable(); 2276 preempt_disable();
2192 2277
@@ -2208,6 +2293,11 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
2208 out: 2293 out:
2209 preempt_enable(); 2294 preempt_enable();
2210 2295
2296 if (event && event->type == RINGBUF_TYPE_PADDING) {
2297 cpu_relax();
2298 goto again;
2299 }
2300
2211 return event; 2301 return event;
2212} 2302}
2213EXPORT_SYMBOL_GPL(ring_buffer_consume); 2303EXPORT_SYMBOL_GPL(ring_buffer_consume);
@@ -2286,6 +2376,7 @@ ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts)
2286 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; 2376 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
2287 unsigned long flags; 2377 unsigned long flags;
2288 2378
2379 again:
2289 spin_lock_irqsave(&cpu_buffer->reader_lock, flags); 2380 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
2290 event = rb_iter_peek(iter, ts); 2381 event = rb_iter_peek(iter, ts);
2291 if (!event) 2382 if (!event)
@@ -2295,6 +2386,11 @@ ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts)
2295 out: 2386 out:
2296 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 2387 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
2297 2388
2389 if (event && event->type == RINGBUF_TYPE_PADDING) {
2390 cpu_relax();
2391 goto again;
2392 }
2393
2298 return event; 2394 return event;
2299} 2395}
2300EXPORT_SYMBOL_GPL(ring_buffer_read); 2396EXPORT_SYMBOL_GPL(ring_buffer_read);
@@ -2764,8 +2860,8 @@ static __init int rb_init_debugfs(void)
2764fs_initcall(rb_init_debugfs); 2860fs_initcall(rb_init_debugfs);
2765 2861
2766#ifdef CONFIG_HOTPLUG_CPU 2862#ifdef CONFIG_HOTPLUG_CPU
2767static int __cpuinit rb_cpu_notify(struct notifier_block *self, 2863static int rb_cpu_notify(struct notifier_block *self,
2768 unsigned long action, void *hcpu) 2864 unsigned long action, void *hcpu)
2769{ 2865{
2770 struct ring_buffer *buffer = 2866 struct ring_buffer *buffer =
2771 container_of(self, struct ring_buffer, cpu_notify); 2867 container_of(self, struct ring_buffer, cpu_notify);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index efe3202c0209..a0174a40c563 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -155,13 +155,6 @@ ns2usecs(cycle_t nsec)
155 return nsec; 155 return nsec;
156} 156}
157 157
158cycle_t ftrace_now(int cpu)
159{
160 u64 ts = ring_buffer_time_stamp(cpu);
161 ring_buffer_normalize_time_stamp(cpu, &ts);
162 return ts;
163}
164
165/* 158/*
166 * The global_trace is the descriptor that holds the tracing 159 * The global_trace is the descriptor that holds the tracing
167 * buffers for the live tracing. For each CPU, it contains 160 * buffers for the live tracing. For each CPU, it contains
@@ -178,6 +171,20 @@ static struct trace_array global_trace;
178 171
179static DEFINE_PER_CPU(struct trace_array_cpu, global_trace_cpu); 172static DEFINE_PER_CPU(struct trace_array_cpu, global_trace_cpu);
180 173
174cycle_t ftrace_now(int cpu)
175{
176 u64 ts;
177
178 /* Early boot up does not have a buffer yet */
179 if (!global_trace.buffer)
180 return trace_clock_local();
181
182 ts = ring_buffer_time_stamp(global_trace.buffer, cpu);
183 ring_buffer_normalize_time_stamp(global_trace.buffer, cpu, &ts);
184
185 return ts;
186}
187
181/* 188/*
182 * The max_tr is used to snapshot the global_trace when a maximum 189 * The max_tr is used to snapshot the global_trace when a maximum
183 * latency is reached. Some tracers will use this to store a maximum 190 * latency is reached. Some tracers will use this to store a maximum
@@ -248,7 +255,7 @@ static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
248 255
249/* trace_flags holds trace_options default values */ 256/* trace_flags holds trace_options default values */
250unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | 257unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
251 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO; 258 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME;
252 259
253/** 260/**
254 * trace_wake_up - wake up tasks waiting for trace input 261 * trace_wake_up - wake up tasks waiting for trace input
@@ -308,6 +315,8 @@ static const char *trace_options[] = {
308 "printk-msg-only", 315 "printk-msg-only",
309 "context-info", 316 "context-info",
310 "latency-format", 317 "latency-format",
318 "global-clock",
319 "sleep-time",
311 NULL 320 NULL
312}; 321};
313 322
@@ -374,7 +383,7 @@ ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt)
374 return cnt; 383 return cnt;
375} 384}
376 385
377ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt) 386static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
378{ 387{
379 int len; 388 int len;
380 void *ret; 389 void *ret;
@@ -633,6 +642,7 @@ void tracing_reset_online_cpus(struct trace_array *tr)
633} 642}
634 643
635#define SAVED_CMDLINES 128 644#define SAVED_CMDLINES 128
645#define NO_CMDLINE_MAP UINT_MAX
636static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1]; 646static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
637static unsigned map_cmdline_to_pid[SAVED_CMDLINES]; 647static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
638static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN]; 648static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN];
@@ -644,8 +654,8 @@ static atomic_t trace_record_cmdline_disabled __read_mostly;
644 654
645static void trace_init_cmdlines(void) 655static void trace_init_cmdlines(void)
646{ 656{
647 memset(&map_pid_to_cmdline, -1, sizeof(map_pid_to_cmdline)); 657 memset(&map_pid_to_cmdline, NO_CMDLINE_MAP, sizeof(map_pid_to_cmdline));
648 memset(&map_cmdline_to_pid, -1, sizeof(map_cmdline_to_pid)); 658 memset(&map_cmdline_to_pid, NO_CMDLINE_MAP, sizeof(map_cmdline_to_pid));
649 cmdline_idx = 0; 659 cmdline_idx = 0;
650} 660}
651 661
@@ -737,8 +747,7 @@ void trace_stop_cmdline_recording(void);
737 747
738static void trace_save_cmdline(struct task_struct *tsk) 748static void trace_save_cmdline(struct task_struct *tsk)
739{ 749{
740 unsigned map; 750 unsigned pid, idx;
741 unsigned idx;
742 751
743 if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT)) 752 if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
744 return; 753 return;
@@ -753,13 +762,20 @@ static void trace_save_cmdline(struct task_struct *tsk)
753 return; 762 return;
754 763
755 idx = map_pid_to_cmdline[tsk->pid]; 764 idx = map_pid_to_cmdline[tsk->pid];
756 if (idx >= SAVED_CMDLINES) { 765 if (idx == NO_CMDLINE_MAP) {
757 idx = (cmdline_idx + 1) % SAVED_CMDLINES; 766 idx = (cmdline_idx + 1) % SAVED_CMDLINES;
758 767
759 map = map_cmdline_to_pid[idx]; 768 /*
760 if (map <= PID_MAX_DEFAULT) 769 * Check whether the cmdline buffer at idx has a pid
761 map_pid_to_cmdline[map] = (unsigned)-1; 770 * mapped. We are going to overwrite that entry so we
771 * need to clear the map_pid_to_cmdline. Otherwise we
772 * would read the new comm for the old pid.
773 */
774 pid = map_cmdline_to_pid[idx];
775 if (pid != NO_CMDLINE_MAP)
776 map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
762 777
778 map_cmdline_to_pid[idx] = tsk->pid;
763 map_pid_to_cmdline[tsk->pid] = idx; 779 map_pid_to_cmdline[tsk->pid] = idx;
764 780
765 cmdline_idx = idx; 781 cmdline_idx = idx;
@@ -770,30 +786,34 @@ static void trace_save_cmdline(struct task_struct *tsk)
770 __raw_spin_unlock(&trace_cmdline_lock); 786 __raw_spin_unlock(&trace_cmdline_lock);
771} 787}
772 788
773char *trace_find_cmdline(int pid) 789void trace_find_cmdline(int pid, char comm[])
774{ 790{
775 char *cmdline = "<...>";
776 unsigned map; 791 unsigned map;
777 792
778 if (!pid) 793 if (!pid) {
779 return "<idle>"; 794 strcpy(comm, "<idle>");
795 return;
796 }
780 797
781 if (pid > PID_MAX_DEFAULT) 798 if (pid > PID_MAX_DEFAULT) {
782 goto out; 799 strcpy(comm, "<...>");
800 return;
801 }
783 802
803 __raw_spin_lock(&trace_cmdline_lock);
784 map = map_pid_to_cmdline[pid]; 804 map = map_pid_to_cmdline[pid];
785 if (map >= SAVED_CMDLINES) 805 if (map != NO_CMDLINE_MAP)
786 goto out; 806 strcpy(comm, saved_cmdlines[map]);
787 807 else
788 cmdline = saved_cmdlines[map]; 808 strcpy(comm, "<...>");
789 809
790 out: 810 __raw_spin_unlock(&trace_cmdline_lock);
791 return cmdline;
792} 811}
793 812
794void tracing_record_cmdline(struct task_struct *tsk) 813void tracing_record_cmdline(struct task_struct *tsk)
795{ 814{
796 if (atomic_read(&trace_record_cmdline_disabled)) 815 if (atomic_read(&trace_record_cmdline_disabled) || !tracer_enabled ||
816 !tracing_is_on())
797 return; 817 return;
798 818
799 trace_save_cmdline(tsk); 819 trace_save_cmdline(tsk);
@@ -841,15 +861,25 @@ static void ftrace_trace_stack(struct trace_array *tr,
841static void ftrace_trace_userstack(struct trace_array *tr, 861static void ftrace_trace_userstack(struct trace_array *tr,
842 unsigned long flags, int pc); 862 unsigned long flags, int pc);
843 863
844void trace_buffer_unlock_commit(struct trace_array *tr, 864static inline void __trace_buffer_unlock_commit(struct trace_array *tr,
845 struct ring_buffer_event *event, 865 struct ring_buffer_event *event,
846 unsigned long flags, int pc) 866 unsigned long flags, int pc,
867 int wake)
847{ 868{
848 ring_buffer_unlock_commit(tr->buffer, event); 869 ring_buffer_unlock_commit(tr->buffer, event);
849 870
850 ftrace_trace_stack(tr, flags, 6, pc); 871 ftrace_trace_stack(tr, flags, 6, pc);
851 ftrace_trace_userstack(tr, flags, pc); 872 ftrace_trace_userstack(tr, flags, pc);
852 trace_wake_up(); 873
874 if (wake)
875 trace_wake_up();
876}
877
878void trace_buffer_unlock_commit(struct trace_array *tr,
879 struct ring_buffer_event *event,
880 unsigned long flags, int pc)
881{
882 __trace_buffer_unlock_commit(tr, event, flags, pc, 1);
853} 883}
854 884
855struct ring_buffer_event * 885struct ring_buffer_event *
@@ -863,7 +893,13 @@ trace_current_buffer_lock_reserve(unsigned char type, unsigned long len,
863void trace_current_buffer_unlock_commit(struct ring_buffer_event *event, 893void trace_current_buffer_unlock_commit(struct ring_buffer_event *event,
864 unsigned long flags, int pc) 894 unsigned long flags, int pc)
865{ 895{
866 return trace_buffer_unlock_commit(&global_trace, event, flags, pc); 896 return __trace_buffer_unlock_commit(&global_trace, event, flags, pc, 1);
897}
898
899void trace_nowake_buffer_unlock_commit(struct ring_buffer_event *event,
900 unsigned long flags, int pc)
901{
902 return __trace_buffer_unlock_commit(&global_trace, event, flags, pc, 0);
867} 903}
868 904
869void 905void
@@ -889,7 +925,7 @@ trace_function(struct trace_array *tr,
889} 925}
890 926
891#ifdef CONFIG_FUNCTION_GRAPH_TRACER 927#ifdef CONFIG_FUNCTION_GRAPH_TRACER
892static void __trace_graph_entry(struct trace_array *tr, 928static int __trace_graph_entry(struct trace_array *tr,
893 struct ftrace_graph_ent *trace, 929 struct ftrace_graph_ent *trace,
894 unsigned long flags, 930 unsigned long flags,
895 int pc) 931 int pc)
@@ -898,15 +934,17 @@ static void __trace_graph_entry(struct trace_array *tr,
898 struct ftrace_graph_ent_entry *entry; 934 struct ftrace_graph_ent_entry *entry;
899 935
900 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) 936 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
901 return; 937 return 0;
902 938
903 event = trace_buffer_lock_reserve(&global_trace, TRACE_GRAPH_ENT, 939 event = trace_buffer_lock_reserve(&global_trace, TRACE_GRAPH_ENT,
904 sizeof(*entry), flags, pc); 940 sizeof(*entry), flags, pc);
905 if (!event) 941 if (!event)
906 return; 942 return 0;
907 entry = ring_buffer_event_data(event); 943 entry = ring_buffer_event_data(event);
908 entry->graph_ent = *trace; 944 entry->graph_ent = *trace;
909 ring_buffer_unlock_commit(global_trace.buffer, event); 945 ring_buffer_unlock_commit(global_trace.buffer, event);
946
947 return 1;
910} 948}
911 949
912static void __trace_graph_return(struct trace_array *tr, 950static void __trace_graph_return(struct trace_array *tr,
@@ -1127,6 +1165,7 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
1127 struct trace_array_cpu *data; 1165 struct trace_array_cpu *data;
1128 unsigned long flags; 1166 unsigned long flags;
1129 long disabled; 1167 long disabled;
1168 int ret;
1130 int cpu; 1169 int cpu;
1131 int pc; 1170 int pc;
1132 1171
@@ -1142,15 +1181,18 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
1142 disabled = atomic_inc_return(&data->disabled); 1181 disabled = atomic_inc_return(&data->disabled);
1143 if (likely(disabled == 1)) { 1182 if (likely(disabled == 1)) {
1144 pc = preempt_count(); 1183 pc = preempt_count();
1145 __trace_graph_entry(tr, trace, flags, pc); 1184 ret = __trace_graph_entry(tr, trace, flags, pc);
1185 } else {
1186 ret = 0;
1146 } 1187 }
1147 /* Only do the atomic if it is not already set */ 1188 /* Only do the atomic if it is not already set */
1148 if (!test_tsk_trace_graph(current)) 1189 if (!test_tsk_trace_graph(current))
1149 set_tsk_trace_graph(current); 1190 set_tsk_trace_graph(current);
1191
1150 atomic_dec(&data->disabled); 1192 atomic_dec(&data->disabled);
1151 local_irq_restore(flags); 1193 local_irq_restore(flags);
1152 1194
1153 return 1; 1195 return ret;
1154} 1196}
1155 1197
1156void trace_graph_return(struct ftrace_graph_ret *trace) 1198void trace_graph_return(struct ftrace_graph_ret *trace)
@@ -1182,7 +1224,7 @@ void trace_graph_return(struct ftrace_graph_ret *trace)
1182 * trace_vbprintk - write binary msg to tracing buffer 1224 * trace_vbprintk - write binary msg to tracing buffer
1183 * 1225 *
1184 */ 1226 */
1185int trace_vbprintk(unsigned long ip, int depth, const char *fmt, va_list args) 1227int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
1186{ 1228{
1187 static raw_spinlock_t trace_buf_lock = 1229 static raw_spinlock_t trace_buf_lock =
1188 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; 1230 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
@@ -1224,7 +1266,6 @@ int trace_vbprintk(unsigned long ip, int depth, const char *fmt, va_list args)
1224 goto out_unlock; 1266 goto out_unlock;
1225 entry = ring_buffer_event_data(event); 1267 entry = ring_buffer_event_data(event);
1226 entry->ip = ip; 1268 entry->ip = ip;
1227 entry->depth = depth;
1228 entry->fmt = fmt; 1269 entry->fmt = fmt;
1229 1270
1230 memcpy(entry->buf, trace_buf, sizeof(u32) * len); 1271 memcpy(entry->buf, trace_buf, sizeof(u32) * len);
@@ -1242,7 +1283,7 @@ out:
1242} 1283}
1243EXPORT_SYMBOL_GPL(trace_vbprintk); 1284EXPORT_SYMBOL_GPL(trace_vbprintk);
1244 1285
1245int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args) 1286int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
1246{ 1287{
1247 static raw_spinlock_t trace_buf_lock = __RAW_SPIN_LOCK_UNLOCKED; 1288 static raw_spinlock_t trace_buf_lock = __RAW_SPIN_LOCK_UNLOCKED;
1248 static char trace_buf[TRACE_BUF_SIZE]; 1289 static char trace_buf[TRACE_BUF_SIZE];
@@ -1279,7 +1320,6 @@ int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args)
1279 goto out_unlock; 1320 goto out_unlock;
1280 entry = ring_buffer_event_data(event); 1321 entry = ring_buffer_event_data(event);
1281 entry->ip = ip; 1322 entry->ip = ip;
1282 entry->depth = depth;
1283 1323
1284 memcpy(&entry->buf, trace_buf, len); 1324 memcpy(&entry->buf, trace_buf, len);
1285 entry->buf[len] = 0; 1325 entry->buf[len] = 0;
@@ -1682,38 +1722,6 @@ static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
1682 return TRACE_TYPE_HANDLED; 1722 return TRACE_TYPE_HANDLED;
1683} 1723}
1684 1724
1685static enum print_line_t print_bprintk_msg_only(struct trace_iterator *iter)
1686{
1687 struct trace_seq *s = &iter->seq;
1688 struct trace_entry *entry = iter->ent;
1689 struct bprint_entry *field;
1690 int ret;
1691
1692 trace_assign_type(field, entry);
1693
1694 ret = trace_seq_bprintf(s, field->fmt, field->buf);
1695 if (!ret)
1696 return TRACE_TYPE_PARTIAL_LINE;
1697
1698 return TRACE_TYPE_HANDLED;
1699}
1700
1701static enum print_line_t print_printk_msg_only(struct trace_iterator *iter)
1702{
1703 struct trace_seq *s = &iter->seq;
1704 struct trace_entry *entry = iter->ent;
1705 struct print_entry *field;
1706 int ret;
1707
1708 trace_assign_type(field, entry);
1709
1710 ret = trace_seq_printf(s, "%s", field->buf);
1711 if (!ret)
1712 return TRACE_TYPE_PARTIAL_LINE;
1713
1714 return TRACE_TYPE_HANDLED;
1715}
1716
1717static enum print_line_t print_bin_fmt(struct trace_iterator *iter) 1725static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
1718{ 1726{
1719 struct trace_seq *s = &iter->seq; 1727 struct trace_seq *s = &iter->seq;
@@ -1775,12 +1783,12 @@ static enum print_line_t print_trace_line(struct trace_iterator *iter)
1775 if (iter->ent->type == TRACE_BPRINT && 1783 if (iter->ent->type == TRACE_BPRINT &&
1776 trace_flags & TRACE_ITER_PRINTK && 1784 trace_flags & TRACE_ITER_PRINTK &&
1777 trace_flags & TRACE_ITER_PRINTK_MSGONLY) 1785 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
1778 return print_bprintk_msg_only(iter); 1786 return trace_print_bprintk_msg_only(iter);
1779 1787
1780 if (iter->ent->type == TRACE_PRINT && 1788 if (iter->ent->type == TRACE_PRINT &&
1781 trace_flags & TRACE_ITER_PRINTK && 1789 trace_flags & TRACE_ITER_PRINTK &&
1782 trace_flags & TRACE_ITER_PRINTK_MSGONLY) 1790 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
1783 return print_printk_msg_only(iter); 1791 return trace_print_printk_msg_only(iter);
1784 1792
1785 if (trace_flags & TRACE_ITER_BIN) 1793 if (trace_flags & TRACE_ITER_BIN)
1786 return print_bin_fmt(iter); 1794 return print_bin_fmt(iter);
@@ -1929,9 +1937,14 @@ int tracing_open_generic(struct inode *inode, struct file *filp)
1929static int tracing_release(struct inode *inode, struct file *file) 1937static int tracing_release(struct inode *inode, struct file *file)
1930{ 1938{
1931 struct seq_file *m = (struct seq_file *)file->private_data; 1939 struct seq_file *m = (struct seq_file *)file->private_data;
1932 struct trace_iterator *iter = m->private; 1940 struct trace_iterator *iter;
1933 int cpu; 1941 int cpu;
1934 1942
1943 if (!(file->f_mode & FMODE_READ))
1944 return 0;
1945
1946 iter = m->private;
1947
1935 mutex_lock(&trace_types_lock); 1948 mutex_lock(&trace_types_lock);
1936 for_each_tracing_cpu(cpu) { 1949 for_each_tracing_cpu(cpu) {
1937 if (iter->buffer_iter[cpu]) 1950 if (iter->buffer_iter[cpu])
@@ -1957,12 +1970,24 @@ static int tracing_open(struct inode *inode, struct file *file)
1957 struct trace_iterator *iter; 1970 struct trace_iterator *iter;
1958 int ret = 0; 1971 int ret = 0;
1959 1972
1960 iter = __tracing_open(inode, file); 1973 /* If this file was open for write, then erase contents */
1961 if (IS_ERR(iter)) 1974 if ((file->f_mode & FMODE_WRITE) &&
1962 ret = PTR_ERR(iter); 1975 !(file->f_flags & O_APPEND)) {
1963 else if (trace_flags & TRACE_ITER_LATENCY_FMT) 1976 long cpu = (long) inode->i_private;
1964 iter->iter_flags |= TRACE_FILE_LAT_FMT;
1965 1977
1978 if (cpu == TRACE_PIPE_ALL_CPU)
1979 tracing_reset_online_cpus(&global_trace);
1980 else
1981 tracing_reset(&global_trace, cpu);
1982 }
1983
1984 if (file->f_mode & FMODE_READ) {
1985 iter = __tracing_open(inode, file);
1986 if (IS_ERR(iter))
1987 ret = PTR_ERR(iter);
1988 else if (trace_flags & TRACE_ITER_LATENCY_FMT)
1989 iter->iter_flags |= TRACE_FILE_LAT_FMT;
1990 }
1966 return ret; 1991 return ret;
1967} 1992}
1968 1993
@@ -2037,9 +2062,17 @@ static int show_traces_open(struct inode *inode, struct file *file)
2037 return ret; 2062 return ret;
2038} 2063}
2039 2064
2065static ssize_t
2066tracing_write_stub(struct file *filp, const char __user *ubuf,
2067 size_t count, loff_t *ppos)
2068{
2069 return count;
2070}
2071
2040static const struct file_operations tracing_fops = { 2072static const struct file_operations tracing_fops = {
2041 .open = tracing_open, 2073 .open = tracing_open,
2042 .read = seq_read, 2074 .read = seq_read,
2075 .write = tracing_write_stub,
2043 .llseek = seq_lseek, 2076 .llseek = seq_lseek,
2044 .release = tracing_release, 2077 .release = tracing_release,
2045}; 2078};
@@ -2240,6 +2273,34 @@ static int set_tracer_option(struct tracer *trace, char *cmp, int neg)
2240 return 0; 2273 return 0;
2241} 2274}
2242 2275
2276static void set_tracer_flags(unsigned int mask, int enabled)
2277{
2278 /* do nothing if flag is already set */
2279 if (!!(trace_flags & mask) == !!enabled)
2280 return;
2281
2282 if (enabled)
2283 trace_flags |= mask;
2284 else
2285 trace_flags &= ~mask;
2286
2287 if (mask == TRACE_ITER_GLOBAL_CLK) {
2288 u64 (*func)(void);
2289
2290 if (enabled)
2291 func = trace_clock_global;
2292 else
2293 func = trace_clock_local;
2294
2295 mutex_lock(&trace_types_lock);
2296 ring_buffer_set_clock(global_trace.buffer, func);
2297
2298 if (max_tr.buffer)
2299 ring_buffer_set_clock(max_tr.buffer, func);
2300 mutex_unlock(&trace_types_lock);
2301 }
2302}
2303
2243static ssize_t 2304static ssize_t
2244tracing_trace_options_write(struct file *filp, const char __user *ubuf, 2305tracing_trace_options_write(struct file *filp, const char __user *ubuf,
2245 size_t cnt, loff_t *ppos) 2306 size_t cnt, loff_t *ppos)
@@ -2267,10 +2328,7 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf,
2267 int len = strlen(trace_options[i]); 2328 int len = strlen(trace_options[i]);
2268 2329
2269 if (strncmp(cmp, trace_options[i], len) == 0) { 2330 if (strncmp(cmp, trace_options[i], len) == 0) {
2270 if (neg) 2331 set_tracer_flags(1 << i, !neg);
2271 trace_flags &= ~(1 << i);
2272 else
2273 trace_flags |= (1 << i);
2274 break; 2332 break;
2275 } 2333 }
2276 } 2334 }
@@ -2494,7 +2552,7 @@ static int tracing_set_tracer(const char *buf)
2494 if (!ring_buffer_expanded) { 2552 if (!ring_buffer_expanded) {
2495 ret = tracing_resize_ring_buffer(trace_buf_size); 2553 ret = tracing_resize_ring_buffer(trace_buf_size);
2496 if (ret < 0) 2554 if (ret < 0)
2497 return ret; 2555 goto out;
2498 ret = 0; 2556 ret = 0;
2499 } 2557 }
2500 2558
@@ -3110,7 +3168,7 @@ static int mark_printk(const char *fmt, ...)
3110 int ret; 3168 int ret;
3111 va_list args; 3169 va_list args;
3112 va_start(args, fmt); 3170 va_start(args, fmt);
3113 ret = trace_vprintk(0, -1, fmt, args); 3171 ret = trace_vprintk(0, fmt, args);
3114 va_end(args); 3172 va_end(args);
3115 return ret; 3173 return ret;
3116} 3174}
@@ -3478,6 +3536,9 @@ struct dentry *tracing_init_dentry(void)
3478 if (d_tracer) 3536 if (d_tracer)
3479 return d_tracer; 3537 return d_tracer;
3480 3538
3539 if (!debugfs_initialized())
3540 return NULL;
3541
3481 d_tracer = debugfs_create_dir("tracing", NULL); 3542 d_tracer = debugfs_create_dir("tracing", NULL);
3482 3543
3483 if (!d_tracer && !once) { 3544 if (!d_tracer && !once) {
@@ -3539,7 +3600,7 @@ static void tracing_init_debugfs_percpu(long cpu)
3539 pr_warning("Could not create debugfs 'trace_pipe' entry\n"); 3600 pr_warning("Could not create debugfs 'trace_pipe' entry\n");
3540 3601
3541 /* per cpu trace */ 3602 /* per cpu trace */
3542 entry = debugfs_create_file("trace", 0444, d_cpu, 3603 entry = debugfs_create_file("trace", 0644, d_cpu,
3543 (void *) cpu, &tracing_fops); 3604 (void *) cpu, &tracing_fops);
3544 if (!entry) 3605 if (!entry)
3545 pr_warning("Could not create debugfs 'trace' entry\n"); 3606 pr_warning("Could not create debugfs 'trace' entry\n");
@@ -3853,7 +3914,7 @@ static __init int tracer_init_debugfs(void)
3853 if (!entry) 3914 if (!entry)
3854 pr_warning("Could not create debugfs 'tracing_cpumask' entry\n"); 3915 pr_warning("Could not create debugfs 'tracing_cpumask' entry\n");
3855 3916
3856 entry = debugfs_create_file("trace", 0444, d_tracer, 3917 entry = debugfs_create_file("trace", 0644, d_tracer,
3857 (void *) TRACE_PIPE_ALL_CPU, &tracing_fops); 3918 (void *) TRACE_PIPE_ALL_CPU, &tracing_fops);
3858 if (!entry) 3919 if (!entry)
3859 pr_warning("Could not create debugfs 'trace' entry\n"); 3920 pr_warning("Could not create debugfs 'trace' entry\n");
@@ -3983,11 +4044,12 @@ trace_printk_seq(struct trace_seq *s)
3983 trace_seq_init(s); 4044 trace_seq_init(s);
3984} 4045}
3985 4046
3986void ftrace_dump(void) 4047static void __ftrace_dump(bool disable_tracing)
3987{ 4048{
3988 static DEFINE_SPINLOCK(ftrace_dump_lock); 4049 static DEFINE_SPINLOCK(ftrace_dump_lock);
3989 /* use static because iter can be a bit big for the stack */ 4050 /* use static because iter can be a bit big for the stack */
3990 static struct trace_iterator iter; 4051 static struct trace_iterator iter;
4052 unsigned int old_userobj;
3991 static int dump_ran; 4053 static int dump_ran;
3992 unsigned long flags; 4054 unsigned long flags;
3993 int cnt = 0, cpu; 4055 int cnt = 0, cpu;
@@ -3999,14 +4061,17 @@ void ftrace_dump(void)
3999 4061
4000 dump_ran = 1; 4062 dump_ran = 1;
4001 4063
4002 /* No turning back! */
4003 tracing_off(); 4064 tracing_off();
4004 ftrace_kill(); 4065
4066 if (disable_tracing)
4067 ftrace_kill();
4005 4068
4006 for_each_tracing_cpu(cpu) { 4069 for_each_tracing_cpu(cpu) {
4007 atomic_inc(&global_trace.data[cpu]->disabled); 4070 atomic_inc(&global_trace.data[cpu]->disabled);
4008 } 4071 }
4009 4072
4073 old_userobj = trace_flags & TRACE_ITER_SYM_USEROBJ;
4074
4010 /* don't look at user memory in panic mode */ 4075 /* don't look at user memory in panic mode */
4011 trace_flags &= ~TRACE_ITER_SYM_USEROBJ; 4076 trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
4012 4077
@@ -4051,10 +4116,26 @@ void ftrace_dump(void)
4051 else 4116 else
4052 printk(KERN_TRACE "---------------------------------\n"); 4117 printk(KERN_TRACE "---------------------------------\n");
4053 4118
4119 /* Re-enable tracing if requested */
4120 if (!disable_tracing) {
4121 trace_flags |= old_userobj;
4122
4123 for_each_tracing_cpu(cpu) {
4124 atomic_dec(&global_trace.data[cpu]->disabled);
4125 }
4126 tracing_on();
4127 }
4128
4054 out: 4129 out:
4055 spin_unlock_irqrestore(&ftrace_dump_lock, flags); 4130 spin_unlock_irqrestore(&ftrace_dump_lock, flags);
4056} 4131}
4057 4132
4133/* By default: disable tracing after the dump */
4134void ftrace_dump(void)
4135{
4136 __ftrace_dump(true);
4137}
4138
4058__init static int tracer_alloc_buffers(void) 4139__init static int tracer_alloc_buffers(void)
4059{ 4140{
4060 struct trace_array_cpu *data; 4141 struct trace_array_cpu *data;
@@ -4125,7 +4206,8 @@ __init static int tracer_alloc_buffers(void)
4125 &trace_panic_notifier); 4206 &trace_panic_notifier);
4126 4207
4127 register_die_notifier(&trace_die_notifier); 4208 register_die_notifier(&trace_die_notifier);
4128 ret = 0; 4209
4210 return 0;
4129 4211
4130out_free_cpumask: 4212out_free_cpumask:
4131 free_cpumask_var(tracing_reader_cpumask); 4213 free_cpumask_var(tracing_reader_cpumask);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index e7fbc826f1e9..9e15802cca9f 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -123,7 +123,6 @@ struct userstack_entry {
123struct bprint_entry { 123struct bprint_entry {
124 struct trace_entry ent; 124 struct trace_entry ent;
125 unsigned long ip; 125 unsigned long ip;
126 int depth;
127 const char *fmt; 126 const char *fmt;
128 u32 buf[]; 127 u32 buf[];
129}; 128};
@@ -131,7 +130,6 @@ struct bprint_entry {
131struct print_entry { 130struct print_entry {
132 struct trace_entry ent; 131 struct trace_entry ent;
133 unsigned long ip; 132 unsigned long ip;
134 int depth;
135 char buf[]; 133 char buf[];
136}; 134};
137 135
@@ -184,6 +182,12 @@ struct trace_power {
184 struct power_trace state_data; 182 struct power_trace state_data;
185}; 183};
186 184
185enum kmemtrace_type_id {
186 KMEMTRACE_TYPE_KMALLOC = 0, /* kmalloc() or kfree(). */
187 KMEMTRACE_TYPE_CACHE, /* kmem_cache_*(). */
188 KMEMTRACE_TYPE_PAGES, /* __get_free_pages() and friends. */
189};
190
187struct kmemtrace_alloc_entry { 191struct kmemtrace_alloc_entry {
188 struct trace_entry ent; 192 struct trace_entry ent;
189 enum kmemtrace_type_id type_id; 193 enum kmemtrace_type_id type_id;
@@ -202,6 +206,19 @@ struct kmemtrace_free_entry {
202 const void *ptr; 206 const void *ptr;
203}; 207};
204 208
209struct syscall_trace_enter {
210 struct trace_entry ent;
211 int nr;
212 unsigned long args[];
213};
214
215struct syscall_trace_exit {
216 struct trace_entry ent;
217 int nr;
218 unsigned long ret;
219};
220
221
205/* 222/*
206 * trace_flag_type is an enumeration that holds different 223 * trace_flag_type is an enumeration that holds different
207 * states when a trace occurs. These are: 224 * states when a trace occurs. These are:
@@ -315,6 +332,10 @@ extern void __ftrace_bad_type(void);
315 TRACE_KMEM_ALLOC); \ 332 TRACE_KMEM_ALLOC); \
316 IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \ 333 IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \
317 TRACE_KMEM_FREE); \ 334 TRACE_KMEM_FREE); \
335 IF_ASSIGN(var, ent, struct syscall_trace_enter, \
336 TRACE_SYSCALL_ENTER); \
337 IF_ASSIGN(var, ent, struct syscall_trace_exit, \
338 TRACE_SYSCALL_EXIT); \
318 __ftrace_bad_type(); \ 339 __ftrace_bad_type(); \
319 } while (0) 340 } while (0)
320 341
@@ -468,6 +489,8 @@ trace_current_buffer_lock_reserve(unsigned char type, unsigned long len,
468 unsigned long flags, int pc); 489 unsigned long flags, int pc);
469void trace_current_buffer_unlock_commit(struct ring_buffer_event *event, 490void trace_current_buffer_unlock_commit(struct ring_buffer_event *event,
470 unsigned long flags, int pc); 491 unsigned long flags, int pc);
492void trace_nowake_buffer_unlock_commit(struct ring_buffer_event *event,
493 unsigned long flags, int pc);
471 494
472struct trace_entry *tracing_get_trace_entry(struct trace_array *tr, 495struct trace_entry *tracing_get_trace_entry(struct trace_array *tr,
473 struct trace_array_cpu *data); 496 struct trace_array_cpu *data);
@@ -547,7 +570,7 @@ struct tracer_switch_ops {
547}; 570};
548#endif /* CONFIG_CONTEXT_SWITCH_TRACER */ 571#endif /* CONFIG_CONTEXT_SWITCH_TRACER */
549 572
550extern char *trace_find_cmdline(int pid); 573extern void trace_find_cmdline(int pid, char comm[]);
551 574
552#ifdef CONFIG_DYNAMIC_FTRACE 575#ifdef CONFIG_DYNAMIC_FTRACE
553extern unsigned long ftrace_update_tot_cnt; 576extern unsigned long ftrace_update_tot_cnt;
@@ -583,9 +606,9 @@ extern int trace_selftest_startup_hw_branches(struct tracer *trace,
583extern void *head_page(struct trace_array_cpu *data); 606extern void *head_page(struct trace_array_cpu *data);
584extern long ns2usecs(cycle_t nsec); 607extern long ns2usecs(cycle_t nsec);
585extern int 608extern int
586trace_vbprintk(unsigned long ip, int depth, const char *fmt, va_list args); 609trace_vbprintk(unsigned long ip, const char *fmt, va_list args);
587extern int 610extern int
588trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args); 611trace_vprintk(unsigned long ip, const char *fmt, va_list args);
589 612
590extern unsigned long trace_flags; 613extern unsigned long trace_flags;
591 614
@@ -669,6 +692,8 @@ enum trace_iterator_flags {
669 TRACE_ITER_PRINTK_MSGONLY = 0x10000, 692 TRACE_ITER_PRINTK_MSGONLY = 0x10000,
670 TRACE_ITER_CONTEXT_INFO = 0x20000, /* Print pid/cpu/time */ 693 TRACE_ITER_CONTEXT_INFO = 0x20000, /* Print pid/cpu/time */
671 TRACE_ITER_LATENCY_FMT = 0x40000, 694 TRACE_ITER_LATENCY_FMT = 0x40000,
695 TRACE_ITER_GLOBAL_CLK = 0x80000,
696 TRACE_ITER_SLEEP_TIME = 0x100000,
672}; 697};
673 698
674/* 699/*
@@ -761,22 +786,89 @@ enum {
761 TRACE_EVENT_TYPE_RAW = 2, 786 TRACE_EVENT_TYPE_RAW = 2,
762}; 787};
763 788
789struct ftrace_event_field {
790 struct list_head link;
791 char *name;
792 char *type;
793 int offset;
794 int size;
795};
796
764struct ftrace_event_call { 797struct ftrace_event_call {
765 char *name; 798 char *name;
766 char *system; 799 char *system;
767 struct dentry *dir; 800 struct dentry *dir;
768 int enabled; 801 int enabled;
769 int (*regfunc)(void); 802 int (*regfunc)(void);
770 void (*unregfunc)(void); 803 void (*unregfunc)(void);
771 int id; 804 int id;
772 int (*raw_init)(void); 805 int (*raw_init)(void);
773 int (*show_format)(struct trace_seq *s); 806 int (*show_format)(struct trace_seq *s);
807 int (*define_fields)(void);
808 struct list_head fields;
809 struct filter_pred **preds;
810
811#ifdef CONFIG_EVENT_PROFILE
812 atomic_t profile_count;
813 int (*profile_enable)(struct ftrace_event_call *);
814 void (*profile_disable)(struct ftrace_event_call *);
815#endif
774}; 816};
775 817
818struct event_subsystem {
819 struct list_head list;
820 const char *name;
821 struct dentry *entry;
822 struct filter_pred **preds;
823};
824
825#define events_for_each(event) \
826 for (event = __start_ftrace_events; \
827 (unsigned long)event < (unsigned long)__stop_ftrace_events; \
828 event++)
829
830#define MAX_FILTER_PRED 8
831
832struct filter_pred;
833
834typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event);
835
836struct filter_pred {
837 filter_pred_fn_t fn;
838 u64 val;
839 char *str_val;
840 int str_len;
841 char *field_name;
842 int offset;
843 int not;
844 int or;
845 int compound;
846 int clear;
847};
848
849int trace_define_field(struct ftrace_event_call *call, char *type,
850 char *name, int offset, int size);
851extern void filter_free_pred(struct filter_pred *pred);
852extern void filter_print_preds(struct filter_pred **preds,
853 struct trace_seq *s);
854extern int filter_parse(char **pbuf, struct filter_pred *pred);
855extern int filter_add_pred(struct ftrace_event_call *call,
856 struct filter_pred *pred);
857extern void filter_free_preds(struct ftrace_event_call *call);
858extern int filter_match_preds(struct ftrace_event_call *call, void *rec);
859extern void filter_free_subsystem_preds(struct event_subsystem *system);
860extern int filter_add_subsystem_pred(struct event_subsystem *system,
861 struct filter_pred *pred);
862
776void event_trace_printk(unsigned long ip, const char *fmt, ...); 863void event_trace_printk(unsigned long ip, const char *fmt, ...);
777extern struct ftrace_event_call __start_ftrace_events[]; 864extern struct ftrace_event_call __start_ftrace_events[];
778extern struct ftrace_event_call __stop_ftrace_events[]; 865extern struct ftrace_event_call __stop_ftrace_events[];
779 866
867#define for_each_event(event) \
868 for (event = __start_ftrace_events; \
869 (unsigned long)event < (unsigned long)__stop_ftrace_events; \
870 event++)
871
780extern const char *__start___trace_bprintk_fmt[]; 872extern const char *__start___trace_bprintk_fmt[];
781extern const char *__stop___trace_bprintk_fmt[]; 873extern const char *__stop___trace_bprintk_fmt[];
782 874
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c
index 05b176abfd30..b588fd81f7f9 100644
--- a/kernel/trace/trace_clock.c
+++ b/kernel/trace/trace_clock.c
@@ -18,6 +18,7 @@
18#include <linux/percpu.h> 18#include <linux/percpu.h>
19#include <linux/sched.h> 19#include <linux/sched.h>
20#include <linux/ktime.h> 20#include <linux/ktime.h>
21#include <linux/trace_clock.h>
21 22
22/* 23/*
23 * trace_clock_local(): the simplest and least coherent tracing clock. 24 * trace_clock_local(): the simplest and least coherent tracing clock.
diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c
new file mode 100644
index 000000000000..22cba9970776
--- /dev/null
+++ b/kernel/trace/trace_event_profile.c
@@ -0,0 +1,31 @@
1/*
2 * trace event based perf counter profiling
3 *
4 * Copyright (C) 2009 Red Hat Inc, Peter Zijlstra <pzijlstr@redhat.com>
5 *
6 */
7
8#include "trace.h"
9
10int ftrace_profile_enable(int event_id)
11{
12 struct ftrace_event_call *event;
13
14 for_each_event(event) {
15 if (event->id == event_id)
16 return event->profile_enable(event);
17 }
18
19 return -EINVAL;
20}
21
22void ftrace_profile_disable(int event_id)
23{
24 struct ftrace_event_call *event;
25
26 for_each_event(event) {
27 if (event->id == event_id)
28 return event->profile_disable(event);
29 }
30}
31
diff --git a/kernel/trace/trace_event_types.h b/kernel/trace/trace_event_types.h
index 019915063fe6..fd78bee71dd7 100644
--- a/kernel/trace/trace_event_types.h
+++ b/kernel/trace/trace_event_types.h
@@ -105,7 +105,6 @@ TRACE_EVENT_FORMAT(user_stack, TRACE_USER_STACK, userstack_entry, ignore,
105TRACE_EVENT_FORMAT(bprint, TRACE_BPRINT, bprint_entry, ignore, 105TRACE_EVENT_FORMAT(bprint, TRACE_BPRINT, bprint_entry, ignore,
106 TRACE_STRUCT( 106 TRACE_STRUCT(
107 TRACE_FIELD(unsigned long, ip, ip) 107 TRACE_FIELD(unsigned long, ip, ip)
108 TRACE_FIELD(unsigned int, depth, depth)
109 TRACE_FIELD(char *, fmt, fmt) 108 TRACE_FIELD(char *, fmt, fmt)
110 TRACE_FIELD_ZERO_CHAR(buf) 109 TRACE_FIELD_ZERO_CHAR(buf)
111 ), 110 ),
@@ -115,7 +114,6 @@ TRACE_EVENT_FORMAT(bprint, TRACE_BPRINT, bprint_entry, ignore,
115TRACE_EVENT_FORMAT(print, TRACE_PRINT, print_entry, ignore, 114TRACE_EVENT_FORMAT(print, TRACE_PRINT, print_entry, ignore,
116 TRACE_STRUCT( 115 TRACE_STRUCT(
117 TRACE_FIELD(unsigned long, ip, ip) 116 TRACE_FIELD(unsigned long, ip, ip)
118 TRACE_FIELD(unsigned int, depth, depth)
119 TRACE_FIELD_ZERO_CHAR(buf) 117 TRACE_FIELD_ZERO_CHAR(buf)
120 ), 118 ),
121 TP_RAW_FMT("%08lx (%d) fmt:%p %s") 119 TP_RAW_FMT("%08lx (%d) fmt:%p %s")
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 238ea95a4115..64ec4d278ffb 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -19,10 +19,38 @@
19 19
20static DEFINE_MUTEX(event_mutex); 20static DEFINE_MUTEX(event_mutex);
21 21
22#define events_for_each(event) \ 22int trace_define_field(struct ftrace_event_call *call, char *type,
23 for (event = __start_ftrace_events; \ 23 char *name, int offset, int size)
24 (unsigned long)event < (unsigned long)__stop_ftrace_events; \ 24{
25 event++) 25 struct ftrace_event_field *field;
26
27 field = kzalloc(sizeof(*field), GFP_KERNEL);
28 if (!field)
29 goto err;
30
31 field->name = kstrdup(name, GFP_KERNEL);
32 if (!field->name)
33 goto err;
34
35 field->type = kstrdup(type, GFP_KERNEL);
36 if (!field->type)
37 goto err;
38
39 field->offset = offset;
40 field->size = size;
41 list_add(&field->link, &call->fields);
42
43 return 0;
44
45err:
46 if (field) {
47 kfree(field->name);
48 kfree(field->type);
49 }
50 kfree(field);
51
52 return -ENOMEM;
53}
26 54
27static void ftrace_clear_events(void) 55static void ftrace_clear_events(void)
28{ 56{
@@ -90,7 +118,7 @@ static int ftrace_set_clr_event(char *buf, int set)
90 } 118 }
91 119
92 mutex_lock(&event_mutex); 120 mutex_lock(&event_mutex);
93 events_for_each(call) { 121 for_each_event(call) {
94 122
95 if (!call->name || !call->regfunc) 123 if (!call->name || !call->regfunc)
96 continue; 124 continue;
@@ -348,7 +376,8 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
348 376
349#undef FIELD 377#undef FIELD
350#define FIELD(type, name) \ 378#define FIELD(type, name) \
351 #type, #name, offsetof(typeof(field), name), sizeof(field.name) 379 #type, "common_" #name, offsetof(typeof(field), name), \
380 sizeof(field.name)
352 381
353static int trace_write_header(struct trace_seq *s) 382static int trace_write_header(struct trace_seq *s)
354{ 383{
@@ -378,15 +407,15 @@ event_format_read(struct file *filp, char __user *ubuf, size_t cnt,
378 char *buf; 407 char *buf;
379 int r; 408 int r;
380 409
410 if (*ppos)
411 return 0;
412
381 s = kmalloc(sizeof(*s), GFP_KERNEL); 413 s = kmalloc(sizeof(*s), GFP_KERNEL);
382 if (!s) 414 if (!s)
383 return -ENOMEM; 415 return -ENOMEM;
384 416
385 trace_seq_init(s); 417 trace_seq_init(s);
386 418
387 if (*ppos)
388 return 0;
389
390 /* If any of the first writes fail, so will the show_format. */ 419 /* If any of the first writes fail, so will the show_format. */
391 420
392 trace_seq_printf(s, "name: %s\n", call->name); 421 trace_seq_printf(s, "name: %s\n", call->name);
@@ -412,6 +441,162 @@ event_format_read(struct file *filp, char __user *ubuf, size_t cnt,
412 return r; 441 return r;
413} 442}
414 443
444static ssize_t
445event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
446{
447 struct ftrace_event_call *call = filp->private_data;
448 struct trace_seq *s;
449 int r;
450
451 if (*ppos)
452 return 0;
453
454 s = kmalloc(sizeof(*s), GFP_KERNEL);
455 if (!s)
456 return -ENOMEM;
457
458 trace_seq_init(s);
459 trace_seq_printf(s, "%d\n", call->id);
460
461 r = simple_read_from_buffer(ubuf, cnt, ppos,
462 s->buffer, s->len);
463 kfree(s);
464 return r;
465}
466
467static ssize_t
468event_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
469 loff_t *ppos)
470{
471 struct ftrace_event_call *call = filp->private_data;
472 struct trace_seq *s;
473 int r;
474
475 if (*ppos)
476 return 0;
477
478 s = kmalloc(sizeof(*s), GFP_KERNEL);
479 if (!s)
480 return -ENOMEM;
481
482 trace_seq_init(s);
483
484 filter_print_preds(call->preds, s);
485 r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
486
487 kfree(s);
488
489 return r;
490}
491
492static ssize_t
493event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
494 loff_t *ppos)
495{
496 struct ftrace_event_call *call = filp->private_data;
497 char buf[64], *pbuf = buf;
498 struct filter_pred *pred;
499 int err;
500
501 if (cnt >= sizeof(buf))
502 return -EINVAL;
503
504 if (copy_from_user(&buf, ubuf, cnt))
505 return -EFAULT;
506
507 pred = kzalloc(sizeof(*pred), GFP_KERNEL);
508 if (!pred)
509 return -ENOMEM;
510
511 err = filter_parse(&pbuf, pred);
512 if (err < 0) {
513 filter_free_pred(pred);
514 return err;
515 }
516
517 if (pred->clear) {
518 filter_free_preds(call);
519 filter_free_pred(pred);
520 return cnt;
521 }
522
523 if (filter_add_pred(call, pred)) {
524 filter_free_pred(pred);
525 return -EINVAL;
526 }
527
528 *ppos += cnt;
529
530 return cnt;
531}
532
533static ssize_t
534subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
535 loff_t *ppos)
536{
537 struct event_subsystem *system = filp->private_data;
538 struct trace_seq *s;
539 int r;
540
541 if (*ppos)
542 return 0;
543
544 s = kmalloc(sizeof(*s), GFP_KERNEL);
545 if (!s)
546 return -ENOMEM;
547
548 trace_seq_init(s);
549
550 filter_print_preds(system->preds, s);
551 r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
552
553 kfree(s);
554
555 return r;
556}
557
558static ssize_t
559subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
560 loff_t *ppos)
561{
562 struct event_subsystem *system = filp->private_data;
563 char buf[64], *pbuf = buf;
564 struct filter_pred *pred;
565 int err;
566
567 if (cnt >= sizeof(buf))
568 return -EINVAL;
569
570 if (copy_from_user(&buf, ubuf, cnt))
571 return -EFAULT;
572
573 pred = kzalloc(sizeof(*pred), GFP_KERNEL);
574 if (!pred)
575 return -ENOMEM;
576
577 err = filter_parse(&pbuf, pred);
578 if (err < 0) {
579 filter_free_pred(pred);
580 return err;
581 }
582
583 if (pred->clear) {
584 filter_free_subsystem_preds(system);
585 filter_free_pred(pred);
586 return cnt;
587 }
588
589 if (filter_add_subsystem_pred(system, pred)) {
590 filter_free_subsystem_preds(system);
591 filter_free_pred(pred);
592 return -EINVAL;
593 }
594
595 *ppos += cnt;
596
597 return cnt;
598}
599
415static const struct seq_operations show_event_seq_ops = { 600static const struct seq_operations show_event_seq_ops = {
416 .start = t_start, 601 .start = t_start,
417 .next = t_next, 602 .next = t_next,
@@ -452,6 +637,23 @@ static const struct file_operations ftrace_event_format_fops = {
452 .read = event_format_read, 637 .read = event_format_read,
453}; 638};
454 639
640static const struct file_operations ftrace_event_id_fops = {
641 .open = tracing_open_generic,
642 .read = event_id_read,
643};
644
645static const struct file_operations ftrace_event_filter_fops = {
646 .open = tracing_open_generic,
647 .read = event_filter_read,
648 .write = event_filter_write,
649};
650
651static const struct file_operations ftrace_subsystem_filter_fops = {
652 .open = tracing_open_generic,
653 .read = subsystem_filter_read,
654 .write = subsystem_filter_write,
655};
656
455static struct dentry *event_trace_events_dir(void) 657static struct dentry *event_trace_events_dir(void)
456{ 658{
457 static struct dentry *d_tracer; 659 static struct dentry *d_tracer;
@@ -472,12 +674,6 @@ static struct dentry *event_trace_events_dir(void)
472 return d_events; 674 return d_events;
473} 675}
474 676
475struct event_subsystem {
476 struct list_head list;
477 const char *name;
478 struct dentry *entry;
479};
480
481static LIST_HEAD(event_subsystems); 677static LIST_HEAD(event_subsystems);
482 678
483static struct dentry * 679static struct dentry *
@@ -510,6 +706,8 @@ event_subsystem_dir(const char *name, struct dentry *d_events)
510 system->name = name; 706 system->name = name;
511 list_add(&system->list, &event_subsystems); 707 list_add(&system->list, &event_subsystems);
512 708
709 system->preds = NULL;
710
513 return system->entry; 711 return system->entry;
514} 712}
515 713
@@ -550,6 +748,28 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events)
550 "'%s/enable' entry\n", call->name); 748 "'%s/enable' entry\n", call->name);
551 } 749 }
552 750
751 if (call->id) {
752 entry = debugfs_create_file("id", 0444, call->dir, call,
753 &ftrace_event_id_fops);
754 if (!entry)
755 pr_warning("Could not create debugfs '%s/id' entry\n",
756 call->name);
757 }
758
759 if (call->define_fields) {
760 ret = call->define_fields();
761 if (ret < 0) {
762 pr_warning("Could not initialize trace point"
763 " events/%s\n", call->name);
764 return ret;
765 }
766 entry = debugfs_create_file("filter", 0644, call->dir, call,
767 &ftrace_event_filter_fops);
768 if (!entry)
769 pr_warning("Could not create debugfs "
770 "'%s/filter' entry\n", call->name);
771 }
772
553 /* A trace may not want to export its format */ 773 /* A trace may not want to export its format */
554 if (!call->show_format) 774 if (!call->show_format)
555 return 0; 775 return 0;
@@ -592,7 +812,7 @@ static __init int event_trace_init(void)
592 if (!d_events) 812 if (!d_events)
593 return 0; 813 return 0;
594 814
595 events_for_each(call) { 815 for_each_event(call) {
596 /* The linker may leave blanks */ 816 /* The linker may leave blanks */
597 if (!call->name) 817 if (!call->name)
598 continue; 818 continue;
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
new file mode 100644
index 000000000000..026be412f356
--- /dev/null
+++ b/kernel/trace/trace_events_filter.c
@@ -0,0 +1,427 @@
1/*
2 * trace_events_filter - generic event filtering
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * Copyright (C) 2009 Tom Zanussi <tzanussi@gmail.com>
19 */
20
21#include <linux/debugfs.h>
22#include <linux/uaccess.h>
23#include <linux/module.h>
24#include <linux/ctype.h>
25
26#include "trace.h"
27#include "trace_output.h"
28
29static int filter_pred_64(struct filter_pred *pred, void *event)
30{
31 u64 *addr = (u64 *)(event + pred->offset);
32 u64 val = (u64)pred->val;
33 int match;
34
35 match = (val == *addr) ^ pred->not;
36
37 return match;
38}
39
40static int filter_pred_32(struct filter_pred *pred, void *event)
41{
42 u32 *addr = (u32 *)(event + pred->offset);
43 u32 val = (u32)pred->val;
44 int match;
45
46 match = (val == *addr) ^ pred->not;
47
48 return match;
49}
50
51static int filter_pred_16(struct filter_pred *pred, void *event)
52{
53 u16 *addr = (u16 *)(event + pred->offset);
54 u16 val = (u16)pred->val;
55 int match;
56
57 match = (val == *addr) ^ pred->not;
58
59 return match;
60}
61
62static int filter_pred_8(struct filter_pred *pred, void *event)
63{
64 u8 *addr = (u8 *)(event + pred->offset);
65 u8 val = (u8)pred->val;
66 int match;
67
68 match = (val == *addr) ^ pred->not;
69
70 return match;
71}
72
73static int filter_pred_string(struct filter_pred *pred, void *event)
74{
75 char *addr = (char *)(event + pred->offset);
76 int cmp, match;
77
78 cmp = strncmp(addr, pred->str_val, pred->str_len);
79
80 match = (!cmp) ^ pred->not;
81
82 return match;
83}
84
85/* return 1 if event matches, 0 otherwise (discard) */
86int filter_match_preds(struct ftrace_event_call *call, void *rec)
87{
88 int i, matched, and_failed = 0;
89 struct filter_pred *pred;
90
91 for (i = 0; i < MAX_FILTER_PRED; i++) {
92 if (call->preds[i]) {
93 pred = call->preds[i];
94 if (and_failed && !pred->or)
95 continue;
96 matched = pred->fn(pred, rec);
97 if (!matched && !pred->or) {
98 and_failed = 1;
99 continue;
100 } else if (matched && pred->or)
101 return 1;
102 } else
103 break;
104 }
105
106 if (and_failed)
107 return 0;
108
109 return 1;
110}
111
112void filter_print_preds(struct filter_pred **preds, struct trace_seq *s)
113{
114 char *field_name;
115 struct filter_pred *pred;
116 int i;
117
118 if (!preds) {
119 trace_seq_printf(s, "none\n");
120 return;
121 }
122
123 for (i = 0; i < MAX_FILTER_PRED; i++) {
124 if (preds[i]) {
125 pred = preds[i];
126 field_name = pred->field_name;
127 if (i)
128 trace_seq_printf(s, pred->or ? "|| " : "&& ");
129 trace_seq_printf(s, "%s ", field_name);
130 trace_seq_printf(s, pred->not ? "!= " : "== ");
131 if (pred->str_val)
132 trace_seq_printf(s, "%s\n", pred->str_val);
133 else
134 trace_seq_printf(s, "%llu\n", pred->val);
135 } else
136 break;
137 }
138}
139
140static struct ftrace_event_field *
141find_event_field(struct ftrace_event_call *call, char *name)
142{
143 struct ftrace_event_field *field;
144
145 list_for_each_entry(field, &call->fields, link) {
146 if (!strcmp(field->name, name))
147 return field;
148 }
149
150 return NULL;
151}
152
153void filter_free_pred(struct filter_pred *pred)
154{
155 if (!pred)
156 return;
157
158 kfree(pred->field_name);
159 kfree(pred->str_val);
160 kfree(pred);
161}
162
163void filter_free_preds(struct ftrace_event_call *call)
164{
165 int i;
166
167 if (call->preds) {
168 for (i = 0; i < MAX_FILTER_PRED; i++)
169 filter_free_pred(call->preds[i]);
170 kfree(call->preds);
171 call->preds = NULL;
172 }
173}
174
175void filter_free_subsystem_preds(struct event_subsystem *system)
176{
177 struct ftrace_event_call *call = __start_ftrace_events;
178 int i;
179
180 if (system->preds) {
181 for (i = 0; i < MAX_FILTER_PRED; i++)
182 filter_free_pred(system->preds[i]);
183 kfree(system->preds);
184 system->preds = NULL;
185 }
186
187 events_for_each(call) {
188 if (!call->name || !call->regfunc)
189 continue;
190
191 if (!strcmp(call->system, system->name))
192 filter_free_preds(call);
193 }
194}
195
196static int __filter_add_pred(struct ftrace_event_call *call,
197 struct filter_pred *pred)
198{
199 int i;
200
201 if (call->preds && !pred->compound)
202 filter_free_preds(call);
203
204 if (!call->preds) {
205 call->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred),
206 GFP_KERNEL);
207 if (!call->preds)
208 return -ENOMEM;
209 }
210
211 for (i = 0; i < MAX_FILTER_PRED; i++) {
212 if (!call->preds[i]) {
213 call->preds[i] = pred;
214 return 0;
215 }
216 }
217
218 return -ENOMEM;
219}
220
221static int is_string_field(const char *type)
222{
223 if (strchr(type, '[') && strstr(type, "char"))
224 return 1;
225
226 return 0;
227}
228
229int filter_add_pred(struct ftrace_event_call *call, struct filter_pred *pred)
230{
231 struct ftrace_event_field *field;
232
233 field = find_event_field(call, pred->field_name);
234 if (!field)
235 return -EINVAL;
236
237 pred->offset = field->offset;
238
239 if (is_string_field(field->type)) {
240 if (!pred->str_val)
241 return -EINVAL;
242 pred->fn = filter_pred_string;
243 pred->str_len = field->size;
244 return __filter_add_pred(call, pred);
245 } else {
246 if (pred->str_val)
247 return -EINVAL;
248 }
249
250 switch (field->size) {
251 case 8:
252 pred->fn = filter_pred_64;
253 break;
254 case 4:
255 pred->fn = filter_pred_32;
256 break;
257 case 2:
258 pred->fn = filter_pred_16;
259 break;
260 case 1:
261 pred->fn = filter_pred_8;
262 break;
263 default:
264 return -EINVAL;
265 }
266
267 return __filter_add_pred(call, pred);
268}
269
270static struct filter_pred *copy_pred(struct filter_pred *pred)
271{
272 struct filter_pred *new_pred = kmalloc(sizeof(*pred), GFP_KERNEL);
273 if (!new_pred)
274 return NULL;
275
276 memcpy(new_pred, pred, sizeof(*pred));
277
278 if (pred->field_name) {
279 new_pred->field_name = kstrdup(pred->field_name, GFP_KERNEL);
280 if (!new_pred->field_name) {
281 kfree(new_pred);
282 return NULL;
283 }
284 }
285
286 if (pred->str_val) {
287 new_pred->str_val = kstrdup(pred->str_val, GFP_KERNEL);
288 if (!new_pred->str_val) {
289 filter_free_pred(new_pred);
290 return NULL;
291 }
292 }
293
294 return new_pred;
295}
296
297int filter_add_subsystem_pred(struct event_subsystem *system,
298 struct filter_pred *pred)
299{
300 struct ftrace_event_call *call = __start_ftrace_events;
301 struct filter_pred *event_pred;
302 int i;
303
304 if (system->preds && !pred->compound)
305 filter_free_subsystem_preds(system);
306
307 if (!system->preds) {
308 system->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred),
309 GFP_KERNEL);
310 if (!system->preds)
311 return -ENOMEM;
312 }
313
314 for (i = 0; i < MAX_FILTER_PRED; i++) {
315 if (!system->preds[i]) {
316 system->preds[i] = pred;
317 break;
318 }
319 }
320
321 if (i == MAX_FILTER_PRED)
322 return -EINVAL;
323
324 events_for_each(call) {
325 int err;
326
327 if (!call->name || !call->regfunc)
328 continue;
329
330 if (strcmp(call->system, system->name))
331 continue;
332
333 if (!find_event_field(call, pred->field_name))
334 continue;
335
336 event_pred = copy_pred(pred);
337 if (!event_pred)
338 goto oom;
339
340 err = filter_add_pred(call, event_pred);
341 if (err)
342 filter_free_pred(event_pred);
343 if (err == -ENOMEM)
344 goto oom;
345 }
346
347 return 0;
348
349oom:
350 system->preds[i] = NULL;
351 return -ENOMEM;
352}
353
354int filter_parse(char **pbuf, struct filter_pred *pred)
355{
356 char *tmp, *tok, *val_str = NULL;
357 int tok_n = 0;
358
359 /* field ==/!= number, or/and field ==/!= number, number */
360 while ((tok = strsep(pbuf, " \n"))) {
361 if (tok_n == 0) {
362 if (!strcmp(tok, "0")) {
363 pred->clear = 1;
364 return 0;
365 } else if (!strcmp(tok, "&&")) {
366 pred->or = 0;
367 pred->compound = 1;
368 } else if (!strcmp(tok, "||")) {
369 pred->or = 1;
370 pred->compound = 1;
371 } else
372 pred->field_name = tok;
373 tok_n = 1;
374 continue;
375 }
376 if (tok_n == 1) {
377 if (!pred->field_name)
378 pred->field_name = tok;
379 else if (!strcmp(tok, "!="))
380 pred->not = 1;
381 else if (!strcmp(tok, "=="))
382 pred->not = 0;
383 else {
384 pred->field_name = NULL;
385 return -EINVAL;
386 }
387 tok_n = 2;
388 continue;
389 }
390 if (tok_n == 2) {
391 if (pred->compound) {
392 if (!strcmp(tok, "!="))
393 pred->not = 1;
394 else if (!strcmp(tok, "=="))
395 pred->not = 0;
396 else {
397 pred->field_name = NULL;
398 return -EINVAL;
399 }
400 } else {
401 val_str = tok;
402 break; /* done */
403 }
404 tok_n = 3;
405 continue;
406 }
407 if (tok_n == 3) {
408 val_str = tok;
409 break; /* done */
410 }
411 }
412
413 pred->field_name = kstrdup(pred->field_name, GFP_KERNEL);
414 if (!pred->field_name)
415 return -ENOMEM;
416
417 pred->val = simple_strtoull(val_str, &tmp, 10);
418 if (tmp == val_str) {
419 pred->str_val = kstrdup(val_str, GFP_KERNEL);
420 if (!pred->str_val)
421 return -ENOMEM;
422 }
423
424 return 0;
425}
426
427
diff --git a/kernel/trace/trace_events_stage_2.h b/kernel/trace/trace_events_stage_2.h
index 5117c43f5c67..30743f7d4110 100644
--- a/kernel/trace/trace_events_stage_2.h
+++ b/kernel/trace/trace_events_stage_2.h
@@ -129,3 +129,48 @@ ftrace_format_##call(struct trace_seq *s) \
129} 129}
130 130
131#include <trace/trace_event_types.h> 131#include <trace/trace_event_types.h>
132
133#undef __field
134#define __field(type, item) \
135 ret = trace_define_field(event_call, #type, #item, \
136 offsetof(typeof(field), item), \
137 sizeof(field.item)); \
138 if (ret) \
139 return ret;
140
141#undef __array
142#define __array(type, item, len) \
143 ret = trace_define_field(event_call, #type "[" #len "]", #item, \
144 offsetof(typeof(field), item), \
145 sizeof(field.item)); \
146 if (ret) \
147 return ret;
148
149#define __common_field(type, item) \
150 ret = trace_define_field(event_call, #type, "common_" #item, \
151 offsetof(typeof(field.ent), item), \
152 sizeof(field.ent.item)); \
153 if (ret) \
154 return ret;
155
156#undef TRACE_EVENT
157#define TRACE_EVENT(call, proto, args, tstruct, func, print) \
158int \
159ftrace_define_fields_##call(void) \
160{ \
161 struct ftrace_raw_##call field; \
162 struct ftrace_event_call *event_call = &event_##call; \
163 int ret; \
164 \
165 __common_field(unsigned char, type); \
166 __common_field(unsigned char, flags); \
167 __common_field(unsigned char, preempt_count); \
168 __common_field(int, pid); \
169 __common_field(int, tgid); \
170 \
171 tstruct; \
172 \
173 return ret; \
174}
175
176#include <trace/trace_event_types.h>
diff --git a/kernel/trace/trace_events_stage_3.h b/kernel/trace/trace_events_stage_3.h
index ae2e323df0c7..9d2fa78cecca 100644
--- a/kernel/trace/trace_events_stage_3.h
+++ b/kernel/trace/trace_events_stage_3.h
@@ -109,6 +109,40 @@
109#undef TP_FMT 109#undef TP_FMT
110#define TP_FMT(fmt, args...) fmt "\n", ##args 110#define TP_FMT(fmt, args...) fmt "\n", ##args
111 111
112#ifdef CONFIG_EVENT_PROFILE
113#define _TRACE_PROFILE(call, proto, args) \
114static void ftrace_profile_##call(proto) \
115{ \
116 extern void perf_tpcounter_event(int); \
117 perf_tpcounter_event(event_##call.id); \
118} \
119 \
120static int ftrace_profile_enable_##call(struct ftrace_event_call *call) \
121{ \
122 int ret = 0; \
123 \
124 if (!atomic_inc_return(&call->profile_count)) \
125 ret = register_trace_##call(ftrace_profile_##call); \
126 \
127 return ret; \
128} \
129 \
130static void ftrace_profile_disable_##call(struct ftrace_event_call *call) \
131{ \
132 if (atomic_add_negative(-1, &call->profile_count)) \
133 unregister_trace_##call(ftrace_profile_##call); \
134}
135
136#define _TRACE_PROFILE_INIT(call) \
137 .profile_count = ATOMIC_INIT(-1), \
138 .profile_enable = ftrace_profile_enable_##call, \
139 .profile_disable = ftrace_profile_disable_##call,
140
141#else
142#define _TRACE_PROFILE(call, proto, args)
143#define _TRACE_PROFILE_INIT(call)
144#endif
145
112#define _TRACE_FORMAT(call, proto, args, fmt) \ 146#define _TRACE_FORMAT(call, proto, args, fmt) \
113static void ftrace_event_##call(proto) \ 147static void ftrace_event_##call(proto) \
114{ \ 148{ \
@@ -130,18 +164,33 @@ static void ftrace_unreg_event_##call(void) \
130{ \ 164{ \
131 unregister_trace_##call(ftrace_event_##call); \ 165 unregister_trace_##call(ftrace_event_##call); \
132} \ 166} \
133 167 \
168static struct ftrace_event_call event_##call; \
169 \
170static int ftrace_init_event_##call(void) \
171{ \
172 int id; \
173 \
174 id = register_ftrace_event(NULL); \
175 if (!id) \
176 return -ENODEV; \
177 event_##call.id = id; \
178 return 0; \
179}
134 180
135#undef TRACE_FORMAT 181#undef TRACE_FORMAT
136#define TRACE_FORMAT(call, proto, args, fmt) \ 182#define TRACE_FORMAT(call, proto, args, fmt) \
137_TRACE_FORMAT(call, PARAMS(proto), PARAMS(args), PARAMS(fmt)) \ 183_TRACE_FORMAT(call, PARAMS(proto), PARAMS(args), PARAMS(fmt)) \
184_TRACE_PROFILE(call, PARAMS(proto), PARAMS(args)) \
138static struct ftrace_event_call __used \ 185static struct ftrace_event_call __used \
139__attribute__((__aligned__(4))) \ 186__attribute__((__aligned__(4))) \
140__attribute__((section("_ftrace_events"))) event_##call = { \ 187__attribute__((section("_ftrace_events"))) event_##call = { \
141 .name = #call, \ 188 .name = #call, \
142 .system = __stringify(TRACE_SYSTEM), \ 189 .system = __stringify(TRACE_SYSTEM), \
190 .raw_init = ftrace_init_event_##call, \
143 .regfunc = ftrace_reg_event_##call, \ 191 .regfunc = ftrace_reg_event_##call, \
144 .unregfunc = ftrace_unreg_event_##call, \ 192 .unregfunc = ftrace_unreg_event_##call, \
193 _TRACE_PROFILE_INIT(call) \
145} 194}
146 195
147#undef __entry 196#undef __entry
@@ -149,11 +198,13 @@ __attribute__((section("_ftrace_events"))) event_##call = { \
149 198
150#undef TRACE_EVENT 199#undef TRACE_EVENT
151#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ 200#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \
201_TRACE_PROFILE(call, PARAMS(proto), PARAMS(args)) \
152 \ 202 \
153static struct ftrace_event_call event_##call; \ 203static struct ftrace_event_call event_##call; \
154 \ 204 \
155static void ftrace_raw_event_##call(proto) \ 205static void ftrace_raw_event_##call(proto) \
156{ \ 206{ \
207 struct ftrace_event_call *call = &event_##call; \
157 struct ring_buffer_event *event; \ 208 struct ring_buffer_event *event; \
158 struct ftrace_raw_##call *entry; \ 209 struct ftrace_raw_##call *entry; \
159 unsigned long irq_flags; \ 210 unsigned long irq_flags; \
@@ -171,7 +222,11 @@ static void ftrace_raw_event_##call(proto) \
171 \ 222 \
172 assign; \ 223 assign; \
173 \ 224 \
174 trace_current_buffer_unlock_commit(event, irq_flags, pc); \ 225 if (call->preds && !filter_match_preds(call, entry)) \
226 ring_buffer_event_discard(event); \
227 \
228 trace_nowake_buffer_unlock_commit(event, irq_flags, pc); \
229 \
175} \ 230} \
176 \ 231 \
177static int ftrace_raw_reg_event_##call(void) \ 232static int ftrace_raw_reg_event_##call(void) \
@@ -202,6 +257,7 @@ static int ftrace_raw_init_event_##call(void) \
202 if (!id) \ 257 if (!id) \
203 return -ENODEV; \ 258 return -ENODEV; \
204 event_##call.id = id; \ 259 event_##call.id = id; \
260 INIT_LIST_HEAD(&event_##call.fields); \
205 return 0; \ 261 return 0; \
206} \ 262} \
207 \ 263 \
@@ -214,4 +270,12 @@ __attribute__((section("_ftrace_events"))) event_##call = { \
214 .regfunc = ftrace_raw_reg_event_##call, \ 270 .regfunc = ftrace_raw_reg_event_##call, \
215 .unregfunc = ftrace_raw_unreg_event_##call, \ 271 .unregfunc = ftrace_raw_unreg_event_##call, \
216 .show_format = ftrace_format_##call, \ 272 .show_format = ftrace_format_##call, \
273 .define_fields = ftrace_define_fields_##call, \
274 _TRACE_PROFILE_INIT(call) \
217} 275}
276
277#include <trace/trace_event_types.h>
278
279#undef _TRACE_PROFILE
280#undef _TRACE_PROFILE_INIT
281
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 4c388607ed67..d28687e7b3a7 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -14,6 +14,11 @@
14#include "trace.h" 14#include "trace.h"
15#include "trace_output.h" 15#include "trace_output.h"
16 16
17struct fgraph_data {
18 pid_t last_pid;
19 int depth;
20};
21
17#define TRACE_GRAPH_INDENT 2 22#define TRACE_GRAPH_INDENT 2
18 23
19/* Flag options */ 24/* Flag options */
@@ -52,9 +57,9 @@ static struct tracer_flags tracer_flags = {
52 57
53/* Add a function return address to the trace stack on thread info.*/ 58/* Add a function return address to the trace stack on thread info.*/
54int 59int
55ftrace_push_return_trace(unsigned long ret, unsigned long long time, 60ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth)
56 unsigned long func, int *depth)
57{ 61{
62 unsigned long long calltime;
58 int index; 63 int index;
59 64
60 if (!current->ret_stack) 65 if (!current->ret_stack)
@@ -66,11 +71,13 @@ ftrace_push_return_trace(unsigned long ret, unsigned long long time,
66 return -EBUSY; 71 return -EBUSY;
67 } 72 }
68 73
74 calltime = trace_clock_local();
75
69 index = ++current->curr_ret_stack; 76 index = ++current->curr_ret_stack;
70 barrier(); 77 barrier();
71 current->ret_stack[index].ret = ret; 78 current->ret_stack[index].ret = ret;
72 current->ret_stack[index].func = func; 79 current->ret_stack[index].func = func;
73 current->ret_stack[index].calltime = time; 80 current->ret_stack[index].calltime = calltime;
74 *depth = index; 81 *depth = index;
75 82
76 return 0; 83 return 0;
@@ -190,15 +197,15 @@ print_graph_cpu(struct trace_seq *s, int cpu)
190static enum print_line_t 197static enum print_line_t
191print_graph_proc(struct trace_seq *s, pid_t pid) 198print_graph_proc(struct trace_seq *s, pid_t pid)
192{ 199{
193 int i; 200 char comm[TASK_COMM_LEN];
194 int ret;
195 int len;
196 char comm[8];
197 int spaces = 0;
198 /* sign + log10(MAX_INT) + '\0' */ 201 /* sign + log10(MAX_INT) + '\0' */
199 char pid_str[11]; 202 char pid_str[11];
203 int spaces = 0;
204 int ret;
205 int len;
206 int i;
200 207
201 strncpy(comm, trace_find_cmdline(pid), 7); 208 trace_find_cmdline(pid, comm);
202 comm[7] = '\0'; 209 comm[7] = '\0';
203 sprintf(pid_str, "%d", pid); 210 sprintf(pid_str, "%d", pid);
204 211
@@ -231,16 +238,16 @@ print_graph_proc(struct trace_seq *s, pid_t pid)
231 238
232/* If the pid changed since the last trace, output this event */ 239/* If the pid changed since the last trace, output this event */
233static enum print_line_t 240static enum print_line_t
234verif_pid(struct trace_seq *s, pid_t pid, int cpu, pid_t *last_pids_cpu) 241verif_pid(struct trace_seq *s, pid_t pid, int cpu, struct fgraph_data *data)
235{ 242{
236 pid_t prev_pid; 243 pid_t prev_pid;
237 pid_t *last_pid; 244 pid_t *last_pid;
238 int ret; 245 int ret;
239 246
240 if (!last_pids_cpu) 247 if (!data)
241 return TRACE_TYPE_HANDLED; 248 return TRACE_TYPE_HANDLED;
242 249
243 last_pid = per_cpu_ptr(last_pids_cpu, cpu); 250 last_pid = &(per_cpu_ptr(data, cpu)->last_pid);
244 251
245 if (*last_pid == pid) 252 if (*last_pid == pid)
246 return TRACE_TYPE_HANDLED; 253 return TRACE_TYPE_HANDLED;
@@ -471,6 +478,7 @@ print_graph_entry_leaf(struct trace_iterator *iter,
471 struct ftrace_graph_ent_entry *entry, 478 struct ftrace_graph_ent_entry *entry,
472 struct ftrace_graph_ret_entry *ret_entry, struct trace_seq *s) 479 struct ftrace_graph_ret_entry *ret_entry, struct trace_seq *s)
473{ 480{
481 struct fgraph_data *data = iter->private;
474 struct ftrace_graph_ret *graph_ret; 482 struct ftrace_graph_ret *graph_ret;
475 struct ftrace_graph_ent *call; 483 struct ftrace_graph_ent *call;
476 unsigned long long duration; 484 unsigned long long duration;
@@ -481,6 +489,18 @@ print_graph_entry_leaf(struct trace_iterator *iter,
481 call = &entry->graph_ent; 489 call = &entry->graph_ent;
482 duration = graph_ret->rettime - graph_ret->calltime; 490 duration = graph_ret->rettime - graph_ret->calltime;
483 491
492 if (data) {
493 int cpu = iter->cpu;
494 int *depth = &(per_cpu_ptr(data, cpu)->depth);
495
496 /*
497 * Comments display at + 1 to depth. Since
498 * this is a leaf function, keep the comments
499 * equal to this depth.
500 */
501 *depth = call->depth - 1;
502 }
503
484 /* Overhead */ 504 /* Overhead */
485 ret = print_graph_overhead(duration, s); 505 ret = print_graph_overhead(duration, s);
486 if (!ret) 506 if (!ret)
@@ -512,12 +532,21 @@ print_graph_entry_leaf(struct trace_iterator *iter,
512} 532}
513 533
514static enum print_line_t 534static enum print_line_t
515print_graph_entry_nested(struct ftrace_graph_ent_entry *entry, 535print_graph_entry_nested(struct trace_iterator *iter,
516 struct trace_seq *s, pid_t pid, int cpu) 536 struct ftrace_graph_ent_entry *entry,
537 struct trace_seq *s, int cpu)
517{ 538{
518 int i;
519 int ret;
520 struct ftrace_graph_ent *call = &entry->graph_ent; 539 struct ftrace_graph_ent *call = &entry->graph_ent;
540 struct fgraph_data *data = iter->private;
541 int ret;
542 int i;
543
544 if (data) {
545 int cpu = iter->cpu;
546 int *depth = &(per_cpu_ptr(data, cpu)->depth);
547
548 *depth = call->depth;
549 }
521 550
522 /* No overhead */ 551 /* No overhead */
523 ret = print_graph_overhead(-1, s); 552 ret = print_graph_overhead(-1, s);
@@ -554,24 +583,24 @@ print_graph_entry_nested(struct ftrace_graph_ent_entry *entry,
554} 583}
555 584
556static enum print_line_t 585static enum print_line_t
557print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s, 586print_graph_prologue(struct trace_iterator *iter, struct trace_seq *s,
558 struct trace_iterator *iter) 587 int type, unsigned long addr)
559{ 588{
560 int ret; 589 struct fgraph_data *data = iter->private;
561 int cpu = iter->cpu;
562 pid_t *last_entry = iter->private;
563 struct trace_entry *ent = iter->ent; 590 struct trace_entry *ent = iter->ent;
564 struct ftrace_graph_ent *call = &field->graph_ent; 591 int cpu = iter->cpu;
565 struct ftrace_graph_ret_entry *leaf_ret; 592 int ret;
566 593
567 /* Pid */ 594 /* Pid */
568 if (verif_pid(s, ent->pid, cpu, last_entry) == TRACE_TYPE_PARTIAL_LINE) 595 if (verif_pid(s, ent->pid, cpu, data) == TRACE_TYPE_PARTIAL_LINE)
569 return TRACE_TYPE_PARTIAL_LINE; 596 return TRACE_TYPE_PARTIAL_LINE;
570 597
571 /* Interrupt */ 598 if (type) {
572 ret = print_graph_irq(iter, call->func, TRACE_GRAPH_ENT, cpu, ent->pid); 599 /* Interrupt */
573 if (ret == TRACE_TYPE_PARTIAL_LINE) 600 ret = print_graph_irq(iter, addr, type, cpu, ent->pid);
574 return TRACE_TYPE_PARTIAL_LINE; 601 if (ret == TRACE_TYPE_PARTIAL_LINE)
602 return TRACE_TYPE_PARTIAL_LINE;
603 }
575 604
576 /* Absolute time */ 605 /* Absolute time */
577 if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME) { 606 if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME) {
@@ -598,11 +627,25 @@ print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
598 return TRACE_TYPE_PARTIAL_LINE; 627 return TRACE_TYPE_PARTIAL_LINE;
599 } 628 }
600 629
630 return 0;
631}
632
633static enum print_line_t
634print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
635 struct trace_iterator *iter)
636{
637 int cpu = iter->cpu;
638 struct ftrace_graph_ent *call = &field->graph_ent;
639 struct ftrace_graph_ret_entry *leaf_ret;
640
641 if (print_graph_prologue(iter, s, TRACE_GRAPH_ENT, call->func))
642 return TRACE_TYPE_PARTIAL_LINE;
643
601 leaf_ret = get_return_for_leaf(iter, field); 644 leaf_ret = get_return_for_leaf(iter, field);
602 if (leaf_ret) 645 if (leaf_ret)
603 return print_graph_entry_leaf(iter, field, leaf_ret, s); 646 return print_graph_entry_leaf(iter, field, leaf_ret, s);
604 else 647 else
605 return print_graph_entry_nested(field, s, iter->ent->pid, cpu); 648 return print_graph_entry_nested(iter, field, s, cpu);
606 649
607} 650}
608 651
@@ -610,40 +653,27 @@ static enum print_line_t
610print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s, 653print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
611 struct trace_entry *ent, struct trace_iterator *iter) 654 struct trace_entry *ent, struct trace_iterator *iter)
612{ 655{
613 int i;
614 int ret;
615 int cpu = iter->cpu;
616 pid_t *last_pid = iter->private, pid = ent->pid;
617 unsigned long long duration = trace->rettime - trace->calltime; 656 unsigned long long duration = trace->rettime - trace->calltime;
657 struct fgraph_data *data = iter->private;
658 pid_t pid = ent->pid;
659 int cpu = iter->cpu;
660 int ret;
661 int i;
618 662
619 /* Pid */ 663 if (data) {
620 if (verif_pid(s, pid, cpu, last_pid) == TRACE_TYPE_PARTIAL_LINE) 664 int cpu = iter->cpu;
621 return TRACE_TYPE_PARTIAL_LINE; 665 int *depth = &(per_cpu_ptr(data, cpu)->depth);
622 666
623 /* Absolute time */ 667 /*
624 if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME) { 668 * Comments display at + 1 to depth. This is the
625 ret = print_graph_abs_time(iter->ts, s); 669 * return from a function, we now want the comments
626 if (!ret) 670 * to display at the same level of the bracket.
627 return TRACE_TYPE_PARTIAL_LINE; 671 */
672 *depth = trace->depth - 1;
628 } 673 }
629 674
630 /* Cpu */ 675 if (print_graph_prologue(iter, s, 0, 0))
631 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) { 676 return TRACE_TYPE_PARTIAL_LINE;
632 ret = print_graph_cpu(s, cpu);
633 if (ret == TRACE_TYPE_PARTIAL_LINE)
634 return TRACE_TYPE_PARTIAL_LINE;
635 }
636
637 /* Proc */
638 if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) {
639 ret = print_graph_proc(s, ent->pid);
640 if (ret == TRACE_TYPE_PARTIAL_LINE)
641 return TRACE_TYPE_PARTIAL_LINE;
642
643 ret = trace_seq_printf(s, " | ");
644 if (!ret)
645 return TRACE_TYPE_PARTIAL_LINE;
646 }
647 677
648 /* Overhead */ 678 /* Overhead */
649 ret = print_graph_overhead(duration, s); 679 ret = print_graph_overhead(duration, s);
@@ -684,42 +714,21 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
684} 714}
685 715
686static enum print_line_t 716static enum print_line_t
687print_graph_comment(struct bprint_entry *trace, struct trace_seq *s, 717print_graph_comment(struct trace_seq *s, struct trace_entry *ent,
688 struct trace_entry *ent, struct trace_iterator *iter) 718 struct trace_iterator *iter)
689{ 719{
690 int i; 720 unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
721 struct fgraph_data *data = iter->private;
722 struct trace_event *event;
723 int depth = 0;
691 int ret; 724 int ret;
692 int cpu = iter->cpu; 725 int i;
693 pid_t *last_pid = iter->private;
694
695 /* Pid */
696 if (verif_pid(s, ent->pid, cpu, last_pid) == TRACE_TYPE_PARTIAL_LINE)
697 return TRACE_TYPE_PARTIAL_LINE;
698
699 /* Absolute time */
700 if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME) {
701 ret = print_graph_abs_time(iter->ts, s);
702 if (!ret)
703 return TRACE_TYPE_PARTIAL_LINE;
704 }
705 726
706 /* Cpu */ 727 if (data)
707 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) { 728 depth = per_cpu_ptr(data, iter->cpu)->depth;
708 ret = print_graph_cpu(s, cpu);
709 if (ret == TRACE_TYPE_PARTIAL_LINE)
710 return TRACE_TYPE_PARTIAL_LINE;
711 }
712 729
713 /* Proc */ 730 if (print_graph_prologue(iter, s, 0, 0))
714 if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) { 731 return TRACE_TYPE_PARTIAL_LINE;
715 ret = print_graph_proc(s, ent->pid);
716 if (ret == TRACE_TYPE_PARTIAL_LINE)
717 return TRACE_TYPE_PARTIAL_LINE;
718
719 ret = trace_seq_printf(s, " | ");
720 if (!ret)
721 return TRACE_TYPE_PARTIAL_LINE;
722 }
723 732
724 /* No overhead */ 733 /* No overhead */
725 ret = print_graph_overhead(-1, s); 734 ret = print_graph_overhead(-1, s);
@@ -734,8 +743,8 @@ print_graph_comment(struct bprint_entry *trace, struct trace_seq *s,
734 } 743 }
735 744
736 /* Indentation */ 745 /* Indentation */
737 if (trace->depth > 0) 746 if (depth > 0)
738 for (i = 0; i < (trace->depth + 1) * TRACE_GRAPH_INDENT; i++) { 747 for (i = 0; i < (depth + 1) * TRACE_GRAPH_INDENT; i++) {
739 ret = trace_seq_printf(s, " "); 748 ret = trace_seq_printf(s, " ");
740 if (!ret) 749 if (!ret)
741 return TRACE_TYPE_PARTIAL_LINE; 750 return TRACE_TYPE_PARTIAL_LINE;
@@ -746,9 +755,26 @@ print_graph_comment(struct bprint_entry *trace, struct trace_seq *s,
746 if (!ret) 755 if (!ret)
747 return TRACE_TYPE_PARTIAL_LINE; 756 return TRACE_TYPE_PARTIAL_LINE;
748 757
749 ret = trace_seq_bprintf(s, trace->fmt, trace->buf); 758 switch (iter->ent->type) {
750 if (!ret) 759 case TRACE_BPRINT:
751 return TRACE_TYPE_PARTIAL_LINE; 760 ret = trace_print_bprintk_msg_only(iter);
761 if (ret != TRACE_TYPE_HANDLED)
762 return ret;
763 break;
764 case TRACE_PRINT:
765 ret = trace_print_printk_msg_only(iter);
766 if (ret != TRACE_TYPE_HANDLED)
767 return ret;
768 break;
769 default:
770 event = ftrace_find_event(ent->type);
771 if (!event)
772 return TRACE_TYPE_UNHANDLED;
773
774 ret = event->trace(iter, sym_flags);
775 if (ret != TRACE_TYPE_HANDLED)
776 return ret;
777 }
752 778
753 /* Strip ending newline */ 779 /* Strip ending newline */
754 if (s->buffer[s->len - 1] == '\n') { 780 if (s->buffer[s->len - 1] == '\n') {
@@ -767,8 +793,8 @@ print_graph_comment(struct bprint_entry *trace, struct trace_seq *s,
767enum print_line_t 793enum print_line_t
768print_graph_function(struct trace_iterator *iter) 794print_graph_function(struct trace_iterator *iter)
769{ 795{
770 struct trace_seq *s = &iter->seq;
771 struct trace_entry *entry = iter->ent; 796 struct trace_entry *entry = iter->ent;
797 struct trace_seq *s = &iter->seq;
772 798
773 switch (entry->type) { 799 switch (entry->type) {
774 case TRACE_GRAPH_ENT: { 800 case TRACE_GRAPH_ENT: {
@@ -781,14 +807,11 @@ print_graph_function(struct trace_iterator *iter)
781 trace_assign_type(field, entry); 807 trace_assign_type(field, entry);
782 return print_graph_return(&field->ret, s, entry, iter); 808 return print_graph_return(&field->ret, s, entry, iter);
783 } 809 }
784 case TRACE_BPRINT: {
785 struct bprint_entry *field;
786 trace_assign_type(field, entry);
787 return print_graph_comment(field, s, entry, iter);
788 }
789 default: 810 default:
790 return TRACE_TYPE_UNHANDLED; 811 return print_graph_comment(s, entry, iter);
791 } 812 }
813
814 return TRACE_TYPE_HANDLED;
792} 815}
793 816
794static void print_graph_headers(struct seq_file *s) 817static void print_graph_headers(struct seq_file *s)
@@ -820,19 +843,21 @@ static void print_graph_headers(struct seq_file *s)
820 843
821static void graph_trace_open(struct trace_iterator *iter) 844static void graph_trace_open(struct trace_iterator *iter)
822{ 845{
823 /* pid on the last trace processed */ 846 /* pid and depth on the last trace processed */
824 pid_t *last_pid = alloc_percpu(pid_t); 847 struct fgraph_data *data = alloc_percpu(struct fgraph_data);
825 int cpu; 848 int cpu;
826 849
827 if (!last_pid) 850 if (!data)
828 pr_warning("function graph tracer: not enough memory\n"); 851 pr_warning("function graph tracer: not enough memory\n");
829 else 852 else
830 for_each_possible_cpu(cpu) { 853 for_each_possible_cpu(cpu) {
831 pid_t *pid = per_cpu_ptr(last_pid, cpu); 854 pid_t *pid = &(per_cpu_ptr(data, cpu)->last_pid);
855 int *depth = &(per_cpu_ptr(data, cpu)->depth);
832 *pid = -1; 856 *pid = -1;
857 *depth = 0;
833 } 858 }
834 859
835 iter->private = last_pid; 860 iter->private = data;
836} 861}
837 862
838static void graph_trace_close(struct trace_iterator *iter) 863static void graph_trace_close(struct trace_iterator *iter)
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index f095916e477f..8e37fcddd8b4 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -359,5 +359,5 @@ void mmio_trace_mapping(struct mmiotrace_map *map)
359 359
360int mmio_trace_printk(const char *fmt, va_list args) 360int mmio_trace_printk(const char *fmt, va_list args)
361{ 361{
362 return trace_vprintk(0, -1, fmt, args); 362 return trace_vprintk(0, fmt, args);
363} 363}
diff --git a/kernel/trace/trace_nop.c b/kernel/trace/trace_nop.c
index 9aa84bde23cd..394f94417e2f 100644
--- a/kernel/trace/trace_nop.c
+++ b/kernel/trace/trace_nop.c
@@ -91,6 +91,7 @@ struct tracer nop_trace __read_mostly =
91 .name = "nop", 91 .name = "nop",
92 .init = nop_trace_init, 92 .init = nop_trace_init,
93 .reset = nop_trace_reset, 93 .reset = nop_trace_reset,
94 .wait_pipe = poll_wait_pipe,
94#ifdef CONFIG_FTRACE_SELFTEST 95#ifdef CONFIG_FTRACE_SELFTEST
95 .selftest = trace_selftest_startup_nop, 96 .selftest = trace_selftest_startup_nop,
96#endif 97#endif
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index ea9d3b410c7a..d72b9a63b247 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -19,6 +19,38 @@ static struct hlist_head event_hash[EVENT_HASHSIZE] __read_mostly;
19 19
20static int next_event_type = __TRACE_LAST_TYPE + 1; 20static int next_event_type = __TRACE_LAST_TYPE + 1;
21 21
22enum print_line_t trace_print_bprintk_msg_only(struct trace_iterator *iter)
23{
24 struct trace_seq *s = &iter->seq;
25 struct trace_entry *entry = iter->ent;
26 struct bprint_entry *field;
27 int ret;
28
29 trace_assign_type(field, entry);
30
31 ret = trace_seq_bprintf(s, field->fmt, field->buf);
32 if (!ret)
33 return TRACE_TYPE_PARTIAL_LINE;
34
35 return TRACE_TYPE_HANDLED;
36}
37
38enum print_line_t trace_print_printk_msg_only(struct trace_iterator *iter)
39{
40 struct trace_seq *s = &iter->seq;
41 struct trace_entry *entry = iter->ent;
42 struct print_entry *field;
43 int ret;
44
45 trace_assign_type(field, entry);
46
47 ret = trace_seq_printf(s, "%s", field->buf);
48 if (!ret)
49 return TRACE_TYPE_PARTIAL_LINE;
50
51 return TRACE_TYPE_HANDLED;
52}
53
22/** 54/**
23 * trace_seq_printf - sequence printing of trace information 55 * trace_seq_printf - sequence printing of trace information
24 * @s: trace sequence descriptor 56 * @s: trace sequence descriptor
@@ -105,7 +137,7 @@ int trace_seq_putc(struct trace_seq *s, unsigned char c)
105 return 1; 137 return 1;
106} 138}
107 139
108int trace_seq_putmem(struct trace_seq *s, void *mem, size_t len) 140int trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len)
109{ 141{
110 if (len > ((PAGE_SIZE - 1) - s->len)) 142 if (len > ((PAGE_SIZE - 1) - s->len))
111 return 0; 143 return 0;
@@ -116,10 +148,10 @@ int trace_seq_putmem(struct trace_seq *s, void *mem, size_t len)
116 return len; 148 return len;
117} 149}
118 150
119int trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len) 151int trace_seq_putmem_hex(struct trace_seq *s, const void *mem, size_t len)
120{ 152{
121 unsigned char hex[HEX_CHARS]; 153 unsigned char hex[HEX_CHARS];
122 unsigned char *data = mem; 154 const unsigned char *data = mem;
123 int i, j; 155 int i, j;
124 156
125#ifdef __BIG_ENDIAN 157#ifdef __BIG_ENDIAN
@@ -135,6 +167,19 @@ int trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len)
135 return trace_seq_putmem(s, hex, j); 167 return trace_seq_putmem(s, hex, j);
136} 168}
137 169
170void *trace_seq_reserve(struct trace_seq *s, size_t len)
171{
172 void *ret;
173
174 if (len > ((PAGE_SIZE - 1) - s->len))
175 return NULL;
176
177 ret = s->buffer + s->len;
178 s->len += len;
179
180 return ret;
181}
182
138int trace_seq_path(struct trace_seq *s, struct path *path) 183int trace_seq_path(struct trace_seq *s, struct path *path)
139{ 184{
140 unsigned char *p; 185 unsigned char *p;
@@ -309,9 +354,9 @@ static int
309lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu) 354lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
310{ 355{
311 int hardirq, softirq; 356 int hardirq, softirq;
312 char *comm; 357 char comm[TASK_COMM_LEN];
313 358
314 comm = trace_find_cmdline(entry->pid); 359 trace_find_cmdline(entry->pid, comm);
315 hardirq = entry->flags & TRACE_FLAG_HARDIRQ; 360 hardirq = entry->flags & TRACE_FLAG_HARDIRQ;
316 softirq = entry->flags & TRACE_FLAG_SOFTIRQ; 361 softirq = entry->flags & TRACE_FLAG_SOFTIRQ;
317 362
@@ -346,10 +391,12 @@ int trace_print_context(struct trace_iterator *iter)
346{ 391{
347 struct trace_seq *s = &iter->seq; 392 struct trace_seq *s = &iter->seq;
348 struct trace_entry *entry = iter->ent; 393 struct trace_entry *entry = iter->ent;
349 char *comm = trace_find_cmdline(entry->pid);
350 unsigned long long t = ns2usecs(iter->ts); 394 unsigned long long t = ns2usecs(iter->ts);
351 unsigned long usec_rem = do_div(t, USEC_PER_SEC); 395 unsigned long usec_rem = do_div(t, USEC_PER_SEC);
352 unsigned long secs = (unsigned long)t; 396 unsigned long secs = (unsigned long)t;
397 char comm[TASK_COMM_LEN];
398
399 trace_find_cmdline(entry->pid, comm);
353 400
354 return trace_seq_printf(s, "%16s-%-5d [%03d] %5lu.%06lu: ", 401 return trace_seq_printf(s, "%16s-%-5d [%03d] %5lu.%06lu: ",
355 comm, entry->pid, iter->cpu, secs, usec_rem); 402 comm, entry->pid, iter->cpu, secs, usec_rem);
@@ -372,7 +419,10 @@ int trace_print_lat_context(struct trace_iterator *iter)
372 rel_usecs = ns2usecs(next_ts - iter->ts); 419 rel_usecs = ns2usecs(next_ts - iter->ts);
373 420
374 if (verbose) { 421 if (verbose) {
375 char *comm = trace_find_cmdline(entry->pid); 422 char comm[TASK_COMM_LEN];
423
424 trace_find_cmdline(entry->pid, comm);
425
376 ret = trace_seq_printf(s, "%16s %5d %3d %d %08x %08lx [%08lx]" 426 ret = trace_seq_printf(s, "%16s %5d %3d %d %08x %08lx [%08lx]"
377 " %ld.%03ldms (+%ld.%03ldms): ", comm, 427 " %ld.%03ldms (+%ld.%03ldms): ", comm,
378 entry->pid, iter->cpu, entry->flags, 428 entry->pid, iter->cpu, entry->flags,
@@ -444,6 +494,11 @@ int register_ftrace_event(struct trace_event *event)
444 494
445 mutex_lock(&trace_event_mutex); 495 mutex_lock(&trace_event_mutex);
446 496
497 if (!event) {
498 ret = next_event_type++;
499 goto out;
500 }
501
447 if (!event->type) 502 if (!event->type)
448 event->type = next_event_type++; 503 event->type = next_event_type++;
449 else if (event->type > __TRACE_LAST_TYPE) { 504 else if (event->type > __TRACE_LAST_TYPE) {
@@ -577,14 +632,15 @@ static enum print_line_t trace_ctxwake_print(struct trace_iterator *iter,
577 char *delim) 632 char *delim)
578{ 633{
579 struct ctx_switch_entry *field; 634 struct ctx_switch_entry *field;
580 char *comm; 635 char comm[TASK_COMM_LEN];
581 int S, T; 636 int S, T;
582 637
638
583 trace_assign_type(field, iter->ent); 639 trace_assign_type(field, iter->ent);
584 640
585 T = task_state_char(field->next_state); 641 T = task_state_char(field->next_state);
586 S = task_state_char(field->prev_state); 642 S = task_state_char(field->prev_state);
587 comm = trace_find_cmdline(field->next_pid); 643 trace_find_cmdline(field->next_pid, comm);
588 if (!trace_seq_printf(&iter->seq, 644 if (!trace_seq_printf(&iter->seq,
589 " %5d:%3d:%c %s [%03d] %5d:%3d:%c %s\n", 645 " %5d:%3d:%c %s [%03d] %5d:%3d:%c %s\n",
590 field->prev_pid, 646 field->prev_pid,
diff --git a/kernel/trace/trace_output.h b/kernel/trace/trace_output.h
index 3b90e6ade1aa..e0bde39c2dd9 100644
--- a/kernel/trace/trace_output.h
+++ b/kernel/trace/trace_output.h
@@ -15,6 +15,11 @@ struct trace_event {
15 trace_print_func binary; 15 trace_print_func binary;
16}; 16};
17 17
18extern enum print_line_t
19trace_print_bprintk_msg_only(struct trace_iterator *iter);
20extern enum print_line_t
21trace_print_printk_msg_only(struct trace_iterator *iter);
22
18extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...) 23extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
19 __attribute__ ((format (printf, 2, 3))); 24 __attribute__ ((format (printf, 2, 3)));
20extern int 25extern int
@@ -24,24 +29,27 @@ seq_print_ip_sym(struct trace_seq *s, unsigned long ip,
24 unsigned long sym_flags); 29 unsigned long sym_flags);
25extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, 30extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
26 size_t cnt); 31 size_t cnt);
27int trace_seq_puts(struct trace_seq *s, const char *str); 32extern int trace_seq_puts(struct trace_seq *s, const char *str);
28int trace_seq_putc(struct trace_seq *s, unsigned char c); 33extern int trace_seq_putc(struct trace_seq *s, unsigned char c);
29int trace_seq_putmem(struct trace_seq *s, void *mem, size_t len); 34extern int trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len);
30int trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len); 35extern int trace_seq_putmem_hex(struct trace_seq *s, const void *mem,
31int trace_seq_path(struct trace_seq *s, struct path *path); 36 size_t len);
32int seq_print_userip_objs(const struct userstack_entry *entry, 37extern void *trace_seq_reserve(struct trace_seq *s, size_t len);
33 struct trace_seq *s, unsigned long sym_flags); 38extern int trace_seq_path(struct trace_seq *s, struct path *path);
34int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm, 39extern int seq_print_userip_objs(const struct userstack_entry *entry,
35 unsigned long ip, unsigned long sym_flags); 40 struct trace_seq *s, unsigned long sym_flags);
36 41extern int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm,
37int trace_print_context(struct trace_iterator *iter); 42 unsigned long ip, unsigned long sym_flags);
38int trace_print_lat_context(struct trace_iterator *iter); 43
39 44extern int trace_print_context(struct trace_iterator *iter);
40struct trace_event *ftrace_find_event(int type); 45extern int trace_print_lat_context(struct trace_iterator *iter);
41int register_ftrace_event(struct trace_event *event); 46
42int unregister_ftrace_event(struct trace_event *event); 47extern struct trace_event *ftrace_find_event(int type);
43 48extern int register_ftrace_event(struct trace_event *event);
44enum print_line_t trace_nop_print(struct trace_iterator *iter, int flags); 49extern int unregister_ftrace_event(struct trace_event *event);
50
51extern enum print_line_t trace_nop_print(struct trace_iterator *iter,
52 int flags);
45 53
46#define MAX_MEMHEX_BYTES 8 54#define MAX_MEMHEX_BYTES 8
47#define HEX_CHARS (MAX_MEMHEX_BYTES*2 + 1) 55#define HEX_CHARS (MAX_MEMHEX_BYTES*2 + 1)
diff --git a/kernel/trace/trace_power.c b/kernel/trace/trace_power.c
index 91ce672fb037..bae791ebcc51 100644
--- a/kernel/trace/trace_power.c
+++ b/kernel/trace/trace_power.c
@@ -122,12 +122,16 @@ fail_start:
122static void start_power_trace(struct trace_array *tr) 122static void start_power_trace(struct trace_array *tr)
123{ 123{
124 trace_power_enabled = 1; 124 trace_power_enabled = 1;
125 tracing_power_register();
126} 125}
127 126
128static void stop_power_trace(struct trace_array *tr) 127static void stop_power_trace(struct trace_array *tr)
129{ 128{
130 trace_power_enabled = 0; 129 trace_power_enabled = 0;
130}
131
132static void power_trace_reset(struct trace_array *tr)
133{
134 trace_power_enabled = 0;
131 unregister_trace_power_start(probe_power_start); 135 unregister_trace_power_start(probe_power_start);
132 unregister_trace_power_end(probe_power_end); 136 unregister_trace_power_end(probe_power_end);
133 unregister_trace_power_mark(probe_power_mark); 137 unregister_trace_power_mark(probe_power_mark);
@@ -188,7 +192,7 @@ static struct tracer power_tracer __read_mostly =
188 .init = power_trace_init, 192 .init = power_trace_init,
189 .start = start_power_trace, 193 .start = start_power_trace,
190 .stop = stop_power_trace, 194 .stop = stop_power_trace,
191 .reset = stop_power_trace, 195 .reset = power_trace_reset,
192 .print_line = power_print_line, 196 .print_line = power_print_line,
193}; 197};
194 198
diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c
index 486785214e3e..eb81556107fe 100644
--- a/kernel/trace/trace_printk.c
+++ b/kernel/trace/trace_printk.c
@@ -112,7 +112,7 @@ int __trace_bprintk(unsigned long ip, const char *fmt, ...)
112 return 0; 112 return 0;
113 113
114 va_start(ap, fmt); 114 va_start(ap, fmt);
115 ret = trace_vbprintk(ip, task_curr_ret_stack(current), fmt, ap); 115 ret = trace_vbprintk(ip, fmt, ap);
116 va_end(ap); 116 va_end(ap);
117 return ret; 117 return ret;
118} 118}
@@ -126,7 +126,7 @@ int __ftrace_vbprintk(unsigned long ip, const char *fmt, va_list ap)
126 if (!(trace_flags & TRACE_ITER_PRINTK)) 126 if (!(trace_flags & TRACE_ITER_PRINTK))
127 return 0; 127 return 0;
128 128
129 return trace_vbprintk(ip, task_curr_ret_stack(current), fmt, ap); 129 return trace_vbprintk(ip, fmt, ap);
130} 130}
131EXPORT_SYMBOL_GPL(__ftrace_vbprintk); 131EXPORT_SYMBOL_GPL(__ftrace_vbprintk);
132 132
@@ -139,7 +139,7 @@ int __trace_printk(unsigned long ip, const char *fmt, ...)
139 return 0; 139 return 0;
140 140
141 va_start(ap, fmt); 141 va_start(ap, fmt);
142 ret = trace_vprintk(ip, task_curr_ret_stack(current), fmt, ap); 142 ret = trace_vprintk(ip, fmt, ap);
143 va_end(ap); 143 va_end(ap);
144 return ret; 144 return ret;
145} 145}
@@ -150,7 +150,7 @@ int __ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap)
150 if (!(trace_flags & TRACE_ITER_PRINTK)) 150 if (!(trace_flags & TRACE_ITER_PRINTK))
151 return 0; 151 return 0;
152 152
153 return trace_vprintk(ip, task_curr_ret_stack(current), fmt, ap); 153 return trace_vprintk(ip, fmt, ap);
154} 154}
155EXPORT_SYMBOL_GPL(__ftrace_vprintk); 155EXPORT_SYMBOL_GPL(__ftrace_vprintk);
156 156
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index 77132c2cf3d9..de35f200abd3 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -18,6 +18,7 @@ static struct trace_array *ctx_trace;
18static int __read_mostly tracer_enabled; 18static int __read_mostly tracer_enabled;
19static int sched_ref; 19static int sched_ref;
20static DEFINE_MUTEX(sched_register_mutex); 20static DEFINE_MUTEX(sched_register_mutex);
21static int sched_stopped;
21 22
22static void 23static void
23probe_sched_switch(struct rq *__rq, struct task_struct *prev, 24probe_sched_switch(struct rq *__rq, struct task_struct *prev,
@@ -28,7 +29,7 @@ probe_sched_switch(struct rq *__rq, struct task_struct *prev,
28 int cpu; 29 int cpu;
29 int pc; 30 int pc;
30 31
31 if (!sched_ref) 32 if (!sched_ref || sched_stopped)
32 return; 33 return;
33 34
34 tracing_record_cmdline(prev); 35 tracing_record_cmdline(prev);
@@ -193,6 +194,7 @@ static void stop_sched_trace(struct trace_array *tr)
193static int sched_switch_trace_init(struct trace_array *tr) 194static int sched_switch_trace_init(struct trace_array *tr)
194{ 195{
195 ctx_trace = tr; 196 ctx_trace = tr;
197 tracing_reset_online_cpus(tr);
196 tracing_start_sched_switch_record(); 198 tracing_start_sched_switch_record();
197 return 0; 199 return 0;
198} 200}
@@ -205,13 +207,12 @@ static void sched_switch_trace_reset(struct trace_array *tr)
205 207
206static void sched_switch_trace_start(struct trace_array *tr) 208static void sched_switch_trace_start(struct trace_array *tr)
207{ 209{
208 tracing_reset_online_cpus(tr); 210 sched_stopped = 0;
209 tracing_start_sched_switch();
210} 211}
211 212
212static void sched_switch_trace_stop(struct trace_array *tr) 213static void sched_switch_trace_stop(struct trace_array *tr)
213{ 214{
214 tracing_stop_sched_switch(); 215 sched_stopped = 1;
215} 216}
216 217
217static struct tracer sched_switch_trace __read_mostly = 218static struct tracer sched_switch_trace __read_mostly =
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index b91091267067..499d01c44cd1 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -250,6 +250,28 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr)
250 250
251 251
252#ifdef CONFIG_FUNCTION_GRAPH_TRACER 252#ifdef CONFIG_FUNCTION_GRAPH_TRACER
253
254/* Maximum number of functions to trace before diagnosing a hang */
255#define GRAPH_MAX_FUNC_TEST 100000000
256
257static void __ftrace_dump(bool disable_tracing);
258static unsigned int graph_hang_thresh;
259
260/* Wrap the real function entry probe to avoid possible hanging */
261static int trace_graph_entry_watchdog(struct ftrace_graph_ent *trace)
262{
263 /* This is harmlessly racy, we want to approximately detect a hang */
264 if (unlikely(++graph_hang_thresh > GRAPH_MAX_FUNC_TEST)) {
265 ftrace_graph_stop();
266 printk(KERN_WARNING "BUG: Function graph tracer hang!\n");
267 if (ftrace_dump_on_oops)
268 __ftrace_dump(false);
269 return 0;
270 }
271
272 return trace_graph_entry(trace);
273}
274
253/* 275/*
254 * Pretty much the same than for the function tracer from which the selftest 276 * Pretty much the same than for the function tracer from which the selftest
255 * has been borrowed. 277 * has been borrowed.
@@ -261,15 +283,29 @@ trace_selftest_startup_function_graph(struct tracer *trace,
261 int ret; 283 int ret;
262 unsigned long count; 284 unsigned long count;
263 285
264 ret = tracer_init(trace, tr); 286 /*
287 * Simulate the init() callback but we attach a watchdog callback
288 * to detect and recover from possible hangs
289 */
290 tracing_reset_online_cpus(tr);
291 ret = register_ftrace_graph(&trace_graph_return,
292 &trace_graph_entry_watchdog);
265 if (ret) { 293 if (ret) {
266 warn_failed_init_tracer(trace, ret); 294 warn_failed_init_tracer(trace, ret);
267 goto out; 295 goto out;
268 } 296 }
297 tracing_start_cmdline_record();
269 298
270 /* Sleep for a 1/10 of a second */ 299 /* Sleep for a 1/10 of a second */
271 msleep(100); 300 msleep(100);
272 301
302 /* Have we just recovered from a hang? */
303 if (graph_hang_thresh > GRAPH_MAX_FUNC_TEST) {
304 tracing_selftest_disabled = true;
305 ret = -1;
306 goto out;
307 }
308
273 tracing_stop(); 309 tracing_stop();
274 310
275 /* check the trace buffer */ 311 /* check the trace buffer */
@@ -317,6 +353,14 @@ trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr)
317 local_irq_disable(); 353 local_irq_disable();
318 udelay(100); 354 udelay(100);
319 local_irq_enable(); 355 local_irq_enable();
356
357 /*
358 * Stop the tracer to avoid a warning subsequent
359 * to buffer flipping failure because tracing_stop()
360 * disables the tr and max buffers, making flipping impossible
361 * in case of parallels max irqs off latencies.
362 */
363 trace->stop(tr);
320 /* stop the tracing. */ 364 /* stop the tracing. */
321 tracing_stop(); 365 tracing_stop();
322 /* check both trace buffers */ 366 /* check both trace buffers */
@@ -371,6 +415,14 @@ trace_selftest_startup_preemptoff(struct tracer *trace, struct trace_array *tr)
371 preempt_disable(); 415 preempt_disable();
372 udelay(100); 416 udelay(100);
373 preempt_enable(); 417 preempt_enable();
418
419 /*
420 * Stop the tracer to avoid a warning subsequent
421 * to buffer flipping failure because tracing_stop()
422 * disables the tr and max buffers, making flipping impossible
423 * in case of parallels max preempt off latencies.
424 */
425 trace->stop(tr);
374 /* stop the tracing. */ 426 /* stop the tracing. */
375 tracing_stop(); 427 tracing_stop();
376 /* check both trace buffers */ 428 /* check both trace buffers */
@@ -416,7 +468,7 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *
416 ret = tracer_init(trace, tr); 468 ret = tracer_init(trace, tr);
417 if (ret) { 469 if (ret) {
418 warn_failed_init_tracer(trace, ret); 470 warn_failed_init_tracer(trace, ret);
419 goto out; 471 goto out_no_start;
420 } 472 }
421 473
422 /* reset the max latency */ 474 /* reset the max latency */
@@ -430,31 +482,35 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *
430 /* reverse the order of preempt vs irqs */ 482 /* reverse the order of preempt vs irqs */
431 local_irq_enable(); 483 local_irq_enable();
432 484
485 /*
486 * Stop the tracer to avoid a warning subsequent
487 * to buffer flipping failure because tracing_stop()
488 * disables the tr and max buffers, making flipping impossible
489 * in case of parallels max irqs/preempt off latencies.
490 */
491 trace->stop(tr);
433 /* stop the tracing. */ 492 /* stop the tracing. */
434 tracing_stop(); 493 tracing_stop();
435 /* check both trace buffers */ 494 /* check both trace buffers */
436 ret = trace_test_buffer(tr, NULL); 495 ret = trace_test_buffer(tr, NULL);
437 if (ret) { 496 if (ret)
438 tracing_start();
439 goto out; 497 goto out;
440 }
441 498
442 ret = trace_test_buffer(&max_tr, &count); 499 ret = trace_test_buffer(&max_tr, &count);
443 if (ret) { 500 if (ret)
444 tracing_start();
445 goto out; 501 goto out;
446 }
447 502
448 if (!ret && !count) { 503 if (!ret && !count) {
449 printk(KERN_CONT ".. no entries found .."); 504 printk(KERN_CONT ".. no entries found ..");
450 ret = -1; 505 ret = -1;
451 tracing_start();
452 goto out; 506 goto out;
453 } 507 }
454 508
455 /* do the test by disabling interrupts first this time */ 509 /* do the test by disabling interrupts first this time */
456 tracing_max_latency = 0; 510 tracing_max_latency = 0;
457 tracing_start(); 511 tracing_start();
512 trace->start(tr);
513
458 preempt_disable(); 514 preempt_disable();
459 local_irq_disable(); 515 local_irq_disable();
460 udelay(100); 516 udelay(100);
@@ -462,6 +518,7 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *
462 /* reverse the order of preempt vs irqs */ 518 /* reverse the order of preempt vs irqs */
463 local_irq_enable(); 519 local_irq_enable();
464 520
521 trace->stop(tr);
465 /* stop the tracing. */ 522 /* stop the tracing. */
466 tracing_stop(); 523 tracing_stop();
467 /* check both trace buffers */ 524 /* check both trace buffers */
@@ -477,9 +534,10 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *
477 goto out; 534 goto out;
478 } 535 }
479 536
480 out: 537out:
481 trace->reset(tr);
482 tracing_start(); 538 tracing_start();
539out_no_start:
540 trace->reset(tr);
483 tracing_max_latency = save_max; 541 tracing_max_latency = save_max;
484 542
485 return ret; 543 return ret;
diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c
index 39310e3434ee..acdebd771a93 100644
--- a/kernel/trace/trace_stat.c
+++ b/kernel/trace/trace_stat.c
@@ -75,7 +75,7 @@ static int stat_seq_init(struct tracer_stat_session *session)
75{ 75{
76 struct trace_stat_list *iter_entry, *new_entry; 76 struct trace_stat_list *iter_entry, *new_entry;
77 struct tracer_stat *ts = session->ts; 77 struct tracer_stat *ts = session->ts;
78 void *prev_stat; 78 void *stat;
79 int ret = 0; 79 int ret = 0;
80 int i; 80 int i;
81 81
@@ -85,6 +85,10 @@ static int stat_seq_init(struct tracer_stat_session *session)
85 if (!ts->stat_cmp) 85 if (!ts->stat_cmp)
86 ts->stat_cmp = dummy_cmp; 86 ts->stat_cmp = dummy_cmp;
87 87
88 stat = ts->stat_start();
89 if (!stat)
90 goto exit;
91
88 /* 92 /*
89 * The first entry. Actually this is the second, but the first 93 * The first entry. Actually this is the second, but the first
90 * one (the stat_list head) is pointless. 94 * one (the stat_list head) is pointless.
@@ -99,14 +103,19 @@ static int stat_seq_init(struct tracer_stat_session *session)
99 103
100 list_add(&new_entry->list, &session->stat_list); 104 list_add(&new_entry->list, &session->stat_list);
101 105
102 new_entry->stat = ts->stat_start(); 106 new_entry->stat = stat;
103 prev_stat = new_entry->stat;
104 107
105 /* 108 /*
106 * Iterate over the tracer stat entries and store them in a sorted 109 * Iterate over the tracer stat entries and store them in a sorted
107 * list. 110 * list.
108 */ 111 */
109 for (i = 1; ; i++) { 112 for (i = 1; ; i++) {
113 stat = ts->stat_next(stat, i);
114
115 /* End of insertion */
116 if (!stat)
117 break;
118
110 new_entry = kmalloc(sizeof(struct trace_stat_list), GFP_KERNEL); 119 new_entry = kmalloc(sizeof(struct trace_stat_list), GFP_KERNEL);
111 if (!new_entry) { 120 if (!new_entry) {
112 ret = -ENOMEM; 121 ret = -ENOMEM;
@@ -114,31 +123,23 @@ static int stat_seq_init(struct tracer_stat_session *session)
114 } 123 }
115 124
116 INIT_LIST_HEAD(&new_entry->list); 125 INIT_LIST_HEAD(&new_entry->list);
117 new_entry->stat = ts->stat_next(prev_stat, i); 126 new_entry->stat = stat;
118 127
119 /* End of insertion */ 128 list_for_each_entry_reverse(iter_entry, &session->stat_list,
120 if (!new_entry->stat) 129 list) {
121 break;
122
123 list_for_each_entry(iter_entry, &session->stat_list, list) {
124 130
125 /* Insertion with a descendent sorting */ 131 /* Insertion with a descendent sorting */
126 if (ts->stat_cmp(new_entry->stat, 132 if (ts->stat_cmp(iter_entry->stat,
127 iter_entry->stat) > 0) { 133 new_entry->stat) >= 0) {
128
129 list_add_tail(&new_entry->list,
130 &iter_entry->list);
131 break;
132 134
133 /* The current smaller value */
134 } else if (list_is_last(&iter_entry->list,
135 &session->stat_list)) {
136 list_add(&new_entry->list, &iter_entry->list); 135 list_add(&new_entry->list, &iter_entry->list);
137 break; 136 break;
138 } 137 }
139 } 138 }
140 139
141 prev_stat = new_entry->stat; 140 /* The current larger value */
141 if (list_empty(&new_entry->list))
142 list_add(&new_entry->list, &session->stat_list);
142 } 143 }
143exit: 144exit:
144 mutex_unlock(&session->stat_mutex); 145 mutex_unlock(&session->stat_mutex);
@@ -160,7 +161,7 @@ static void *stat_seq_start(struct seq_file *s, loff_t *pos)
160 161
161 /* If we are in the beginning of the file, print the headers */ 162 /* If we are in the beginning of the file, print the headers */
162 if (!*pos && session->ts->stat_headers) 163 if (!*pos && session->ts->stat_headers)
163 session->ts->stat_headers(s); 164 return SEQ_START_TOKEN;
164 165
165 return seq_list_start(&session->stat_list, *pos); 166 return seq_list_start(&session->stat_list, *pos);
166} 167}
@@ -169,6 +170,9 @@ static void *stat_seq_next(struct seq_file *s, void *p, loff_t *pos)
169{ 170{
170 struct tracer_stat_session *session = s->private; 171 struct tracer_stat_session *session = s->private;
171 172
173 if (p == SEQ_START_TOKEN)
174 return seq_list_start(&session->stat_list, *pos);
175
172 return seq_list_next(p, &session->stat_list, pos); 176 return seq_list_next(p, &session->stat_list, pos);
173} 177}
174 178
@@ -183,6 +187,9 @@ static int stat_seq_show(struct seq_file *s, void *v)
183 struct tracer_stat_session *session = s->private; 187 struct tracer_stat_session *session = s->private;
184 struct trace_stat_list *l = list_entry(v, struct trace_stat_list, list); 188 struct trace_stat_list *l = list_entry(v, struct trace_stat_list, list);
185 189
190 if (v == SEQ_START_TOKEN)
191 return session->ts->stat_headers(s);
192
186 return session->ts->stat_show(s, l->stat); 193 return session->ts->stat_show(s, l->stat);
187} 194}
188 195
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 66cf97449af3..a2a3af29c943 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -1,21 +1,112 @@
1#include <linux/ftrace.h>
2#include <linux/kernel.h> 1#include <linux/kernel.h>
3 2#include <linux/ftrace.h>
4#include <asm/syscall.h> 3#include <asm/syscall.h>
5 4
6#include "trace_output.h" 5#include "trace_output.h"
7#include "trace.h" 6#include "trace.h"
8 7
9static atomic_t refcount; 8/* Keep a counter of the syscall tracing users */
9static int refcount;
10
11/* Prevent from races on thread flags toggling */
12static DEFINE_MUTEX(syscall_trace_lock);
13
14/* Option to display the parameters types */
15enum {
16 TRACE_SYSCALLS_OPT_TYPES = 0x1,
17};
18
19static struct tracer_opt syscalls_opts[] = {
20 { TRACER_OPT(syscall_arg_type, TRACE_SYSCALLS_OPT_TYPES) },
21 { }
22};
23
24static struct tracer_flags syscalls_flags = {
25 .val = 0, /* By default: no parameters types */
26 .opts = syscalls_opts
27};
28
29enum print_line_t
30print_syscall_enter(struct trace_iterator *iter, int flags)
31{
32 struct trace_seq *s = &iter->seq;
33 struct trace_entry *ent = iter->ent;
34 struct syscall_trace_enter *trace;
35 struct syscall_metadata *entry;
36 int i, ret, syscall;
37
38 trace_assign_type(trace, ent);
39
40 syscall = trace->nr;
41
42 entry = syscall_nr_to_meta(syscall);
43 if (!entry)
44 goto end;
45
46 ret = trace_seq_printf(s, "%s(", entry->name);
47 if (!ret)
48 return TRACE_TYPE_PARTIAL_LINE;
49
50 for (i = 0; i < entry->nb_args; i++) {
51 /* parameter types */
52 if (syscalls_flags.val & TRACE_SYSCALLS_OPT_TYPES) {
53 ret = trace_seq_printf(s, "%s ", entry->types[i]);
54 if (!ret)
55 return TRACE_TYPE_PARTIAL_LINE;
56 }
57 /* parameter values */
58 ret = trace_seq_printf(s, "%s: %lx%s ", entry->args[i],
59 trace->args[i],
60 i == entry->nb_args - 1 ? ")" : ",");
61 if (!ret)
62 return TRACE_TYPE_PARTIAL_LINE;
63 }
64
65end:
66 trace_seq_printf(s, "\n");
67 return TRACE_TYPE_HANDLED;
68}
69
70enum print_line_t
71print_syscall_exit(struct trace_iterator *iter, int flags)
72{
73 struct trace_seq *s = &iter->seq;
74 struct trace_entry *ent = iter->ent;
75 struct syscall_trace_exit *trace;
76 int syscall;
77 struct syscall_metadata *entry;
78 int ret;
79
80 trace_assign_type(trace, ent);
81
82 syscall = trace->nr;
83
84 entry = syscall_nr_to_meta(syscall);
85 if (!entry) {
86 trace_seq_printf(s, "\n");
87 return TRACE_TYPE_HANDLED;
88 }
89
90 ret = trace_seq_printf(s, "%s -> 0x%lx\n", entry->name,
91 trace->ret);
92 if (!ret)
93 return TRACE_TYPE_PARTIAL_LINE;
94
95 return TRACE_TYPE_HANDLED;
96}
10 97
11void start_ftrace_syscalls(void) 98void start_ftrace_syscalls(void)
12{ 99{
13 unsigned long flags; 100 unsigned long flags;
14 struct task_struct *g, *t; 101 struct task_struct *g, *t;
15 102
16 if (atomic_inc_return(&refcount) != 1) 103 mutex_lock(&syscall_trace_lock);
17 goto out;
18 104
105 /* Don't enable the flag on the tasks twice */
106 if (++refcount != 1)
107 goto unlock;
108
109 arch_init_ftrace_syscalls();
19 read_lock_irqsave(&tasklist_lock, flags); 110 read_lock_irqsave(&tasklist_lock, flags);
20 111
21 do_each_thread(g, t) { 112 do_each_thread(g, t) {
@@ -23,8 +114,9 @@ void start_ftrace_syscalls(void)
23 } while_each_thread(g, t); 114 } while_each_thread(g, t);
24 115
25 read_unlock_irqrestore(&tasklist_lock, flags); 116 read_unlock_irqrestore(&tasklist_lock, flags);
26out: 117
27 atomic_dec(&refcount); 118unlock:
119 mutex_unlock(&syscall_trace_lock);
28} 120}
29 121
30void stop_ftrace_syscalls(void) 122void stop_ftrace_syscalls(void)
@@ -32,8 +124,11 @@ void stop_ftrace_syscalls(void)
32 unsigned long flags; 124 unsigned long flags;
33 struct task_struct *g, *t; 125 struct task_struct *g, *t;
34 126
35 if (atomic_dec_return(&refcount)) 127 mutex_lock(&syscall_trace_lock);
36 goto out; 128
129 /* There are perhaps still some users */
130 if (--refcount)
131 goto unlock;
37 132
38 read_lock_irqsave(&tasklist_lock, flags); 133 read_lock_irqsave(&tasklist_lock, flags);
39 134
@@ -42,26 +137,64 @@ void stop_ftrace_syscalls(void)
42 } while_each_thread(g, t); 137 } while_each_thread(g, t);
43 138
44 read_unlock_irqrestore(&tasklist_lock, flags); 139 read_unlock_irqrestore(&tasklist_lock, flags);
45out: 140
46 atomic_inc(&refcount); 141unlock:
142 mutex_unlock(&syscall_trace_lock);
47} 143}
48 144
49void ftrace_syscall_enter(struct pt_regs *regs) 145void ftrace_syscall_enter(struct pt_regs *regs)
50{ 146{
147 struct syscall_trace_enter *entry;
148 struct syscall_metadata *sys_data;
149 struct ring_buffer_event *event;
150 int size;
51 int syscall_nr; 151 int syscall_nr;
52 152
53 syscall_nr = syscall_get_nr(current, regs); 153 syscall_nr = syscall_get_nr(current, regs);
54 154
55 trace_printk("syscall %d enter\n", syscall_nr); 155 sys_data = syscall_nr_to_meta(syscall_nr);
156 if (!sys_data)
157 return;
158
159 size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args;
160
161 event = trace_current_buffer_lock_reserve(TRACE_SYSCALL_ENTER, size,
162 0, 0);
163 if (!event)
164 return;
165
166 entry = ring_buffer_event_data(event);
167 entry->nr = syscall_nr;
168 syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args);
169
170 trace_current_buffer_unlock_commit(event, 0, 0);
171 trace_wake_up();
56} 172}
57 173
58void ftrace_syscall_exit(struct pt_regs *regs) 174void ftrace_syscall_exit(struct pt_regs *regs)
59{ 175{
176 struct syscall_trace_exit *entry;
177 struct syscall_metadata *sys_data;
178 struct ring_buffer_event *event;
60 int syscall_nr; 179 int syscall_nr;
61 180
62 syscall_nr = syscall_get_nr(current, regs); 181 syscall_nr = syscall_get_nr(current, regs);
63 182
64 trace_printk("syscall %d exit\n", syscall_nr); 183 sys_data = syscall_nr_to_meta(syscall_nr);
184 if (!sys_data)
185 return;
186
187 event = trace_current_buffer_lock_reserve(TRACE_SYSCALL_EXIT,
188 sizeof(*entry), 0, 0);
189 if (!event)
190 return;
191
192 entry = ring_buffer_event_data(event);
193 entry->nr = syscall_nr;
194 entry->ret = syscall_get_return_value(current, regs);
195
196 trace_current_buffer_unlock_commit(event, 0, 0);
197 trace_wake_up();
65} 198}
66 199
67static int init_syscall_tracer(struct trace_array *tr) 200static int init_syscall_tracer(struct trace_array *tr)
@@ -74,20 +207,24 @@ static int init_syscall_tracer(struct trace_array *tr)
74static void reset_syscall_tracer(struct trace_array *tr) 207static void reset_syscall_tracer(struct trace_array *tr)
75{ 208{
76 stop_ftrace_syscalls(); 209 stop_ftrace_syscalls();
210 tracing_reset_online_cpus(tr);
77} 211}
78 212
79static struct trace_event syscall_enter_event = { 213static struct trace_event syscall_enter_event = {
80 .type = TRACE_SYSCALL_ENTER, 214 .type = TRACE_SYSCALL_ENTER,
215 .trace = print_syscall_enter,
81}; 216};
82 217
83static struct trace_event syscall_exit_event = { 218static struct trace_event syscall_exit_event = {
84 .type = TRACE_SYSCALL_EXIT, 219 .type = TRACE_SYSCALL_EXIT,
220 .trace = print_syscall_exit,
85}; 221};
86 222
87static struct tracer syscall_tracer __read_mostly = { 223static struct tracer syscall_tracer __read_mostly = {
88 .name = "syscall", 224 .name = "syscall",
89 .init = init_syscall_tracer, 225 .init = init_syscall_tracer,
90 .reset = reset_syscall_tracer 226 .reset = reset_syscall_tracer,
227 .flags = &syscalls_flags,
91}; 228};
92 229
93__init int register_ftrace_syscalls(void) 230__init int register_ftrace_syscalls(void)
diff --git a/kernel/trace/trace_workqueue.c b/kernel/trace/trace_workqueue.c
index 9ab035b58cf1..797201e4a137 100644
--- a/kernel/trace/trace_workqueue.c
+++ b/kernel/trace/trace_workqueue.c
@@ -196,6 +196,11 @@ static int workqueue_stat_show(struct seq_file *s, void *p)
196 struct pid *pid; 196 struct pid *pid;
197 struct task_struct *tsk; 197 struct task_struct *tsk;
198 198
199 spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
200 if (&cws->list == workqueue_cpu_stat(cpu)->list.next)
201 seq_printf(s, "\n");
202 spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
203
199 pid = find_get_pid(cws->pid); 204 pid = find_get_pid(cws->pid);
200 if (pid) { 205 if (pid) {
201 tsk = get_pid_task(pid, PIDTYPE_PID); 206 tsk = get_pid_task(pid, PIDTYPE_PID);
@@ -208,18 +213,13 @@ static int workqueue_stat_show(struct seq_file *s, void *p)
208 put_pid(pid); 213 put_pid(pid);
209 } 214 }
210 215
211 spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
212 if (&cws->list == workqueue_cpu_stat(cpu)->list.next)
213 seq_printf(s, "\n");
214 spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
215
216 return 0; 216 return 0;
217} 217}
218 218
219static int workqueue_stat_headers(struct seq_file *s) 219static int workqueue_stat_headers(struct seq_file *s)
220{ 220{
221 seq_printf(s, "# CPU INSERTED EXECUTED NAME\n"); 221 seq_printf(s, "# CPU INSERTED EXECUTED NAME\n");
222 seq_printf(s, "# | | | |\n\n"); 222 seq_printf(s, "# | | | |\n");
223 return 0; 223 return 0;
224} 224}
225 225