aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/trace
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2009-09-07 02:19:51 -0400
committerIngo Molnar <mingo@elte.hu>2009-09-07 02:19:51 -0400
commita1922ed661ab2c1637d0b10cde933bd9cd33d965 (patch)
tree0f1777542b385ebefd30b3586d830fd8ed6fda5b /kernel/trace
parent75e33751ca8bbb72dd6f1a74d2810ddc8cbe4bdf (diff)
parentd28daf923ac5e4a0d7cecebae56f3e339189366b (diff)
Merge branch 'tracing/core' into tracing/hw-breakpoints
Conflicts: arch/Kconfig kernel/trace/trace.h Merge reason: resolve the conflicts, plus adopt to the new ring-buffer APIs. Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/trace')
-rw-r--r--kernel/trace/Kconfig27
-rw-r--r--kernel/trace/blktrace.c25
-rw-r--r--kernel/trace/ftrace.c215
-rw-r--r--kernel/trace/kmemtrace.c149
-rw-r--r--kernel/trace/ring_buffer.c1437
-rw-r--r--kernel/trace/ring_buffer_benchmark.c45
-rw-r--r--kernel/trace/trace.c736
-rw-r--r--kernel/trace/trace.h87
-rw-r--r--kernel/trace/trace_boot.c16
-rw-r--r--kernel/trace/trace_event_profile.c2
-rw-r--r--kernel/trace/trace_event_types.h3
-rw-r--r--kernel/trace/trace_events.c178
-rw-r--r--kernel/trace/trace_events_filter.c292
-rw-r--r--kernel/trace/trace_export.c28
-rw-r--r--kernel/trace/trace_functions.c17
-rw-r--r--kernel/trace/trace_functions_graph.c213
-rw-r--r--kernel/trace/trace_irqsoff.c3
-rw-r--r--kernel/trace/trace_mmiotrace.c10
-rw-r--r--kernel/trace/trace_output.c3
-rw-r--r--kernel/trace/trace_power.c22
-rw-r--r--kernel/trace/trace_printk.c28
-rw-r--r--kernel/trace/trace_sched_switch.c59
-rw-r--r--kernel/trace/trace_sched_wakeup.c7
-rw-r--r--kernel/trace/trace_selftest.c1
-rw-r--r--kernel/trace/trace_stack.c54
-rw-r--r--kernel/trace/trace_stat.c53
-rw-r--r--kernel/trace/trace_stat.h2
-rw-r--r--kernel/trace/trace_syscalls.c471
-rw-r--r--kernel/trace/trace_workqueue.c32
29 files changed, 2829 insertions, 1386 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index ae048a2dbbe8..5efeb4229ea0 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -18,6 +18,13 @@ config HAVE_FUNCTION_TRACER
18config HAVE_FUNCTION_GRAPH_TRACER 18config HAVE_FUNCTION_GRAPH_TRACER
19 bool 19 bool
20 20
21config HAVE_FUNCTION_GRAPH_FP_TEST
22 bool
23 help
24 An arch may pass in a unique value (frame pointer) to both the
25 entering and exiting of a function. On exit, the value is compared
26 and if it does not match, then it will panic the kernel.
27
21config HAVE_FUNCTION_TRACE_MCOUNT_TEST 28config HAVE_FUNCTION_TRACE_MCOUNT_TEST
22 bool 29 bool
23 help 30 help
@@ -34,7 +41,7 @@ config HAVE_FTRACE_MCOUNT_RECORD
34config HAVE_HW_BRANCH_TRACER 41config HAVE_HW_BRANCH_TRACER
35 bool 42 bool
36 43
37config HAVE_FTRACE_SYSCALLS 44config HAVE_SYSCALL_TRACEPOINTS
38 bool 45 bool
39 46
40config TRACER_MAX_TRACE 47config TRACER_MAX_TRACE
@@ -53,9 +60,14 @@ config EVENT_TRACING
53 bool 60 bool
54 61
55config CONTEXT_SWITCH_TRACER 62config CONTEXT_SWITCH_TRACER
56 select MARKERS
57 bool 63 bool
58 64
65config RING_BUFFER_ALLOW_SWAP
66 bool
67 help
68 Allow the use of ring_buffer_swap_cpu.
69 Adds a very slight overhead to tracing when enabled.
70
59# All tracer options should select GENERIC_TRACER. For those options that are 71# All tracer options should select GENERIC_TRACER. For those options that are
60# enabled by all tracers (context switch and event tracer) they select TRACING. 72# enabled by all tracers (context switch and event tracer) they select TRACING.
61# This allows those options to appear when no other tracer is selected. But the 73# This allows those options to appear when no other tracer is selected. But the
@@ -121,6 +133,7 @@ config FUNCTION_GRAPH_TRACER
121 bool "Kernel Function Graph Tracer" 133 bool "Kernel Function Graph Tracer"
122 depends on HAVE_FUNCTION_GRAPH_TRACER 134 depends on HAVE_FUNCTION_GRAPH_TRACER
123 depends on FUNCTION_TRACER 135 depends on FUNCTION_TRACER
136 depends on !X86_32 || !CC_OPTIMIZE_FOR_SIZE
124 default y 137 default y
125 help 138 help
126 Enable the kernel to trace a function at both its return 139 Enable the kernel to trace a function at both its return
@@ -139,6 +152,7 @@ config IRQSOFF_TRACER
139 select TRACE_IRQFLAGS 152 select TRACE_IRQFLAGS
140 select GENERIC_TRACER 153 select GENERIC_TRACER
141 select TRACER_MAX_TRACE 154 select TRACER_MAX_TRACE
155 select RING_BUFFER_ALLOW_SWAP
142 help 156 help
143 This option measures the time spent in irqs-off critical 157 This option measures the time spent in irqs-off critical
144 sections, with microsecond accuracy. 158 sections, with microsecond accuracy.
@@ -160,6 +174,7 @@ config PREEMPT_TRACER
160 depends on PREEMPT 174 depends on PREEMPT
161 select GENERIC_TRACER 175 select GENERIC_TRACER
162 select TRACER_MAX_TRACE 176 select TRACER_MAX_TRACE
177 select RING_BUFFER_ALLOW_SWAP
163 help 178 help
164 This option measures the time spent in preemption off critical 179 This option measures the time spent in preemption off critical
165 sections, with microsecond accuracy. 180 sections, with microsecond accuracy.
@@ -203,7 +218,7 @@ config ENABLE_DEFAULT_TRACERS
203 218
204config FTRACE_SYSCALLS 219config FTRACE_SYSCALLS
205 bool "Trace syscalls" 220 bool "Trace syscalls"
206 depends on HAVE_FTRACE_SYSCALLS 221 depends on HAVE_SYSCALL_TRACEPOINTS
207 select GENERIC_TRACER 222 select GENERIC_TRACER
208 select KALLSYMS 223 select KALLSYMS
209 help 224 help
@@ -218,13 +233,13 @@ config BOOT_TRACER
218 the timings of the initcalls and traces key events and the identity 233 the timings of the initcalls and traces key events and the identity
219 of tasks that can cause boot delays, such as context-switches. 234 of tasks that can cause boot delays, such as context-switches.
220 235
221 Its aim is to be parsed by the /scripts/bootgraph.pl tool to 236 Its aim is to be parsed by the scripts/bootgraph.pl tool to
222 produce pretty graphics about boot inefficiencies, giving a visual 237 produce pretty graphics about boot inefficiencies, giving a visual
223 representation of the delays during initcalls - but the raw 238 representation of the delays during initcalls - but the raw
224 /debug/tracing/trace text output is readable too. 239 /debug/tracing/trace text output is readable too.
225 240
226 You must pass in ftrace=initcall to the kernel command line 241 You must pass in initcall_debug and ftrace=initcall to the kernel
227 to enable this on bootup. 242 command line to enable this on bootup.
228 243
229config TRACE_BRANCH_PROFILING 244config TRACE_BRANCH_PROFILING
230 bool 245 bool
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 39af8af6fc30..3eb159c277c8 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -22,6 +22,7 @@
22#include <linux/init.h> 22#include <linux/init.h>
23#include <linux/mutex.h> 23#include <linux/mutex.h>
24#include <linux/debugfs.h> 24#include <linux/debugfs.h>
25#include <linux/smp_lock.h>
25#include <linux/time.h> 26#include <linux/time.h>
26#include <linux/uaccess.h> 27#include <linux/uaccess.h>
27 28
@@ -64,13 +65,15 @@ static void trace_note(struct blk_trace *bt, pid_t pid, int action,
64{ 65{
65 struct blk_io_trace *t; 66 struct blk_io_trace *t;
66 struct ring_buffer_event *event = NULL; 67 struct ring_buffer_event *event = NULL;
68 struct ring_buffer *buffer = NULL;
67 int pc = 0; 69 int pc = 0;
68 int cpu = smp_processor_id(); 70 int cpu = smp_processor_id();
69 bool blk_tracer = blk_tracer_enabled; 71 bool blk_tracer = blk_tracer_enabled;
70 72
71 if (blk_tracer) { 73 if (blk_tracer) {
74 buffer = blk_tr->buffer;
72 pc = preempt_count(); 75 pc = preempt_count();
73 event = trace_buffer_lock_reserve(blk_tr, TRACE_BLK, 76 event = trace_buffer_lock_reserve(buffer, TRACE_BLK,
74 sizeof(*t) + len, 77 sizeof(*t) + len,
75 0, pc); 78 0, pc);
76 if (!event) 79 if (!event)
@@ -95,7 +98,7 @@ record_it:
95 memcpy((void *) t + sizeof(*t), data, len); 98 memcpy((void *) t + sizeof(*t), data, len);
96 99
97 if (blk_tracer) 100 if (blk_tracer)
98 trace_buffer_unlock_commit(blk_tr, event, 0, pc); 101 trace_buffer_unlock_commit(buffer, event, 0, pc);
99 } 102 }
100} 103}
101 104
@@ -178,6 +181,7 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
178{ 181{
179 struct task_struct *tsk = current; 182 struct task_struct *tsk = current;
180 struct ring_buffer_event *event = NULL; 183 struct ring_buffer_event *event = NULL;
184 struct ring_buffer *buffer = NULL;
181 struct blk_io_trace *t; 185 struct blk_io_trace *t;
182 unsigned long flags = 0; 186 unsigned long flags = 0;
183 unsigned long *sequence; 187 unsigned long *sequence;
@@ -203,8 +207,9 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
203 if (blk_tracer) { 207 if (blk_tracer) {
204 tracing_record_cmdline(current); 208 tracing_record_cmdline(current);
205 209
210 buffer = blk_tr->buffer;
206 pc = preempt_count(); 211 pc = preempt_count();
207 event = trace_buffer_lock_reserve(blk_tr, TRACE_BLK, 212 event = trace_buffer_lock_reserve(buffer, TRACE_BLK,
208 sizeof(*t) + pdu_len, 213 sizeof(*t) + pdu_len,
209 0, pc); 214 0, pc);
210 if (!event) 215 if (!event)
@@ -251,7 +256,7 @@ record_it:
251 memcpy((void *) t + sizeof(*t), pdu_data, pdu_len); 256 memcpy((void *) t + sizeof(*t), pdu_data, pdu_len);
252 257
253 if (blk_tracer) { 258 if (blk_tracer) {
254 trace_buffer_unlock_commit(blk_tr, event, 0, pc); 259 trace_buffer_unlock_commit(buffer, event, 0, pc);
255 return; 260 return;
256 } 261 }
257 } 262 }
@@ -266,8 +271,8 @@ static void blk_trace_free(struct blk_trace *bt)
266{ 271{
267 debugfs_remove(bt->msg_file); 272 debugfs_remove(bt->msg_file);
268 debugfs_remove(bt->dropped_file); 273 debugfs_remove(bt->dropped_file);
269 debugfs_remove(bt->dir);
270 relay_close(bt->rchan); 274 relay_close(bt->rchan);
275 debugfs_remove(bt->dir);
271 free_percpu(bt->sequence); 276 free_percpu(bt->sequence);
272 free_percpu(bt->msg_data); 277 free_percpu(bt->msg_data);
273 kfree(bt); 278 kfree(bt);
@@ -377,18 +382,8 @@ static int blk_subbuf_start_callback(struct rchan_buf *buf, void *subbuf,
377 382
378static int blk_remove_buf_file_callback(struct dentry *dentry) 383static int blk_remove_buf_file_callback(struct dentry *dentry)
379{ 384{
380 struct dentry *parent = dentry->d_parent;
381 debugfs_remove(dentry); 385 debugfs_remove(dentry);
382 386
383 /*
384 * this will fail for all but the last file, but that is ok. what we
385 * care about is the top level buts->name directory going away, when
386 * the last trace file is gone. Then we don't have to rmdir() that
387 * manually on trace stop, so it nicely solves the issue with
388 * force killing of running traces.
389 */
390
391 debugfs_remove(parent);
392 return 0; 387 return 0;
393} 388}
394 389
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index bb60732ade0c..8c804e24f96f 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -291,7 +291,9 @@ function_stat_next(void *v, int idx)
291 pg = (struct ftrace_profile_page *)((unsigned long)rec & PAGE_MASK); 291 pg = (struct ftrace_profile_page *)((unsigned long)rec & PAGE_MASK);
292 292
293 again: 293 again:
294 rec++; 294 if (idx != 0)
295 rec++;
296
295 if ((void *)rec >= (void *)&pg->records[pg->index]) { 297 if ((void *)rec >= (void *)&pg->records[pg->index]) {
296 pg = pg->next; 298 pg = pg->next;
297 if (!pg) 299 if (!pg)
@@ -766,7 +768,7 @@ static struct tracer_stat function_stats __initdata = {
766 .stat_show = function_stat_show 768 .stat_show = function_stat_show
767}; 769};
768 770
769static void ftrace_profile_debugfs(struct dentry *d_tracer) 771static __init void ftrace_profile_debugfs(struct dentry *d_tracer)
770{ 772{
771 struct ftrace_profile_stat *stat; 773 struct ftrace_profile_stat *stat;
772 struct dentry *entry; 774 struct dentry *entry;
@@ -784,7 +786,6 @@ static void ftrace_profile_debugfs(struct dentry *d_tracer)
784 * The files created are permanent, if something happens 786 * The files created are permanent, if something happens
785 * we still do not free memory. 787 * we still do not free memory.
786 */ 788 */
787 kfree(stat);
788 WARN(1, 789 WARN(1,
789 "Could not allocate stat file for cpu %d\n", 790 "Could not allocate stat file for cpu %d\n",
790 cpu); 791 cpu);
@@ -811,7 +812,7 @@ static void ftrace_profile_debugfs(struct dentry *d_tracer)
811} 812}
812 813
813#else /* CONFIG_FUNCTION_PROFILER */ 814#else /* CONFIG_FUNCTION_PROFILER */
814static void ftrace_profile_debugfs(struct dentry *d_tracer) 815static __init void ftrace_profile_debugfs(struct dentry *d_tracer)
815{ 816{
816} 817}
817#endif /* CONFIG_FUNCTION_PROFILER */ 818#endif /* CONFIG_FUNCTION_PROFILER */
@@ -1015,71 +1016,35 @@ static int
1015__ftrace_replace_code(struct dyn_ftrace *rec, int enable) 1016__ftrace_replace_code(struct dyn_ftrace *rec, int enable)
1016{ 1017{
1017 unsigned long ftrace_addr; 1018 unsigned long ftrace_addr;
1018 unsigned long ip, fl; 1019 unsigned long flag = 0UL;
1019 1020
1020 ftrace_addr = (unsigned long)FTRACE_ADDR; 1021 ftrace_addr = (unsigned long)FTRACE_ADDR;
1021 1022
1022 ip = rec->ip;
1023
1024 /* 1023 /*
1025 * If this record is not to be traced and 1024 * If this record is not to be traced or we want to disable it,
1026 * it is not enabled then do nothing. 1025 * then disable it.
1027 * 1026 *
1028 * If this record is not to be traced and 1027 * If we want to enable it and filtering is off, then enable it.
1029 * it is enabled then disable it.
1030 * 1028 *
1029 * If we want to enable it and filtering is on, enable it only if
1030 * it's filtered
1031 */ 1031 */
1032 if (rec->flags & FTRACE_FL_NOTRACE) { 1032 if (enable && !(rec->flags & FTRACE_FL_NOTRACE)) {
1033 if (rec->flags & FTRACE_FL_ENABLED) 1033 if (!ftrace_filtered || (rec->flags & FTRACE_FL_FILTER))
1034 rec->flags &= ~FTRACE_FL_ENABLED; 1034 flag = FTRACE_FL_ENABLED;
1035 else 1035 }
1036 return 0;
1037
1038 } else if (ftrace_filtered && enable) {
1039 /*
1040 * Filtering is on:
1041 */
1042
1043 fl = rec->flags & (FTRACE_FL_FILTER | FTRACE_FL_ENABLED);
1044
1045 /* Record is filtered and enabled, do nothing */
1046 if (fl == (FTRACE_FL_FILTER | FTRACE_FL_ENABLED))
1047 return 0;
1048
1049 /* Record is not filtered or enabled, do nothing */
1050 if (!fl)
1051 return 0;
1052
1053 /* Record is not filtered but enabled, disable it */
1054 if (fl == FTRACE_FL_ENABLED)
1055 rec->flags &= ~FTRACE_FL_ENABLED;
1056 else
1057 /* Otherwise record is filtered but not enabled, enable it */
1058 rec->flags |= FTRACE_FL_ENABLED;
1059 } else {
1060 /* Disable or not filtered */
1061
1062 if (enable) {
1063 /* if record is enabled, do nothing */
1064 if (rec->flags & FTRACE_FL_ENABLED)
1065 return 0;
1066
1067 rec->flags |= FTRACE_FL_ENABLED;
1068
1069 } else {
1070 1036
1071 /* if record is not enabled, do nothing */ 1037 /* If the state of this record hasn't changed, then do nothing */
1072 if (!(rec->flags & FTRACE_FL_ENABLED)) 1038 if ((rec->flags & FTRACE_FL_ENABLED) == flag)
1073 return 0; 1039 return 0;
1074 1040
1075 rec->flags &= ~FTRACE_FL_ENABLED; 1041 if (flag) {
1076 } 1042 rec->flags |= FTRACE_FL_ENABLED;
1043 return ftrace_make_call(rec, ftrace_addr);
1077 } 1044 }
1078 1045
1079 if (rec->flags & FTRACE_FL_ENABLED) 1046 rec->flags &= ~FTRACE_FL_ENABLED;
1080 return ftrace_make_call(rec, ftrace_addr); 1047 return ftrace_make_nop(NULL, rec, ftrace_addr);
1081 else
1082 return ftrace_make_nop(NULL, rec, ftrace_addr);
1083} 1048}
1084 1049
1085static void ftrace_replace_code(int enable) 1050static void ftrace_replace_code(int enable)
@@ -1224,6 +1189,13 @@ static void ftrace_shutdown(int command)
1224 return; 1189 return;
1225 1190
1226 ftrace_start_up--; 1191 ftrace_start_up--;
1192 /*
1193 * Just warn in case of unbalance, no need to kill ftrace, it's not
1194 * critical but the ftrace_call callers may be never nopped again after
1195 * further ftrace uses.
1196 */
1197 WARN_ON_ONCE(ftrace_start_up < 0);
1198
1227 if (!ftrace_start_up) 1199 if (!ftrace_start_up)
1228 command |= FTRACE_DISABLE_CALLS; 1200 command |= FTRACE_DISABLE_CALLS;
1229 1201
@@ -1367,7 +1339,6 @@ struct ftrace_iterator {
1367 unsigned flags; 1339 unsigned flags;
1368 unsigned char buffer[FTRACE_BUFF_MAX+1]; 1340 unsigned char buffer[FTRACE_BUFF_MAX+1];
1369 unsigned buffer_idx; 1341 unsigned buffer_idx;
1370 unsigned filtered;
1371}; 1342};
1372 1343
1373static void * 1344static void *
@@ -1410,28 +1381,33 @@ static void *t_hash_start(struct seq_file *m, loff_t *pos)
1410{ 1381{
1411 struct ftrace_iterator *iter = m->private; 1382 struct ftrace_iterator *iter = m->private;
1412 void *p = NULL; 1383 void *p = NULL;
1384 loff_t l;
1385
1386 if (!(iter->flags & FTRACE_ITER_HASH))
1387 *pos = 0;
1413 1388
1414 iter->flags |= FTRACE_ITER_HASH; 1389 iter->flags |= FTRACE_ITER_HASH;
1415 1390
1416 return t_hash_next(m, p, pos); 1391 iter->hidx = 0;
1392 for (l = 0; l <= *pos; ) {
1393 p = t_hash_next(m, p, &l);
1394 if (!p)
1395 break;
1396 }
1397 return p;
1417} 1398}
1418 1399
1419static int t_hash_show(struct seq_file *m, void *v) 1400static int t_hash_show(struct seq_file *m, void *v)
1420{ 1401{
1421 struct ftrace_func_probe *rec; 1402 struct ftrace_func_probe *rec;
1422 struct hlist_node *hnd = v; 1403 struct hlist_node *hnd = v;
1423 char str[KSYM_SYMBOL_LEN];
1424 1404
1425 rec = hlist_entry(hnd, struct ftrace_func_probe, node); 1405 rec = hlist_entry(hnd, struct ftrace_func_probe, node);
1426 1406
1427 if (rec->ops->print) 1407 if (rec->ops->print)
1428 return rec->ops->print(m, rec->ip, rec->ops, rec->data); 1408 return rec->ops->print(m, rec->ip, rec->ops, rec->data);
1429 1409
1430 kallsyms_lookup(rec->ip, NULL, NULL, NULL, str); 1410 seq_printf(m, "%pf:%pf", (void *)rec->ip, (void *)rec->ops->func);
1431 seq_printf(m, "%s:", str);
1432
1433 kallsyms_lookup((unsigned long)rec->ops->func, NULL, NULL, NULL, str);
1434 seq_printf(m, "%s", str);
1435 1411
1436 if (rec->data) 1412 if (rec->data)
1437 seq_printf(m, ":%p", rec->data); 1413 seq_printf(m, ":%p", rec->data);
@@ -1460,8 +1436,6 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
1460 iter->pg = iter->pg->next; 1436 iter->pg = iter->pg->next;
1461 iter->idx = 0; 1437 iter->idx = 0;
1462 goto retry; 1438 goto retry;
1463 } else {
1464 iter->idx = -1;
1465 } 1439 }
1466 } else { 1440 } else {
1467 rec = &iter->pg->records[iter->idx++]; 1441 rec = &iter->pg->records[iter->idx++];
@@ -1490,6 +1464,7 @@ static void *t_start(struct seq_file *m, loff_t *pos)
1490{ 1464{
1491 struct ftrace_iterator *iter = m->private; 1465 struct ftrace_iterator *iter = m->private;
1492 void *p = NULL; 1466 void *p = NULL;
1467 loff_t l;
1493 1468
1494 mutex_lock(&ftrace_lock); 1469 mutex_lock(&ftrace_lock);
1495 /* 1470 /*
@@ -1501,23 +1476,21 @@ static void *t_start(struct seq_file *m, loff_t *pos)
1501 if (*pos > 0) 1476 if (*pos > 0)
1502 return t_hash_start(m, pos); 1477 return t_hash_start(m, pos);
1503 iter->flags |= FTRACE_ITER_PRINTALL; 1478 iter->flags |= FTRACE_ITER_PRINTALL;
1504 (*pos)++;
1505 return iter; 1479 return iter;
1506 } 1480 }
1507 1481
1508 if (iter->flags & FTRACE_ITER_HASH) 1482 if (iter->flags & FTRACE_ITER_HASH)
1509 return t_hash_start(m, pos); 1483 return t_hash_start(m, pos);
1510 1484
1511 if (*pos > 0) { 1485 iter->pg = ftrace_pages_start;
1512 if (iter->idx < 0) 1486 iter->idx = 0;
1513 return p; 1487 for (l = 0; l <= *pos; ) {
1514 (*pos)--; 1488 p = t_next(m, p, &l);
1515 iter->idx--; 1489 if (!p)
1490 break;
1516 } 1491 }
1517 1492
1518 p = t_next(m, p, pos); 1493 if (!p && iter->flags & FTRACE_ITER_FILTER)
1519
1520 if (!p)
1521 return t_hash_start(m, pos); 1494 return t_hash_start(m, pos);
1522 1495
1523 return p; 1496 return p;
@@ -1532,7 +1505,6 @@ static int t_show(struct seq_file *m, void *v)
1532{ 1505{
1533 struct ftrace_iterator *iter = m->private; 1506 struct ftrace_iterator *iter = m->private;
1534 struct dyn_ftrace *rec = v; 1507 struct dyn_ftrace *rec = v;
1535 char str[KSYM_SYMBOL_LEN];
1536 1508
1537 if (iter->flags & FTRACE_ITER_HASH) 1509 if (iter->flags & FTRACE_ITER_HASH)
1538 return t_hash_show(m, v); 1510 return t_hash_show(m, v);
@@ -1545,9 +1517,7 @@ static int t_show(struct seq_file *m, void *v)
1545 if (!rec) 1517 if (!rec)
1546 return 0; 1518 return 0;
1547 1519
1548 kallsyms_lookup(rec->ip, NULL, NULL, NULL, str); 1520 seq_printf(m, "%pf\n", (void *)rec->ip);
1549
1550 seq_printf(m, "%s\n", str);
1551 1521
1552 return 0; 1522 return 0;
1553} 1523}
@@ -1586,17 +1556,6 @@ ftrace_avail_open(struct inode *inode, struct file *file)
1586 return ret; 1556 return ret;
1587} 1557}
1588 1558
1589int ftrace_avail_release(struct inode *inode, struct file *file)
1590{
1591 struct seq_file *m = (struct seq_file *)file->private_data;
1592 struct ftrace_iterator *iter = m->private;
1593
1594 seq_release(inode, file);
1595 kfree(iter);
1596
1597 return 0;
1598}
1599
1600static int 1559static int
1601ftrace_failures_open(struct inode *inode, struct file *file) 1560ftrace_failures_open(struct inode *inode, struct file *file)
1602{ 1561{
@@ -1647,7 +1606,7 @@ ftrace_regex_open(struct inode *inode, struct file *file, int enable)
1647 1606
1648 mutex_lock(&ftrace_regex_lock); 1607 mutex_lock(&ftrace_regex_lock);
1649 if ((file->f_mode & FMODE_WRITE) && 1608 if ((file->f_mode & FMODE_WRITE) &&
1650 !(file->f_flags & O_APPEND)) 1609 (file->f_flags & O_TRUNC))
1651 ftrace_filter_reset(enable); 1610 ftrace_filter_reset(enable);
1652 1611
1653 if (file->f_mode & FMODE_READ) { 1612 if (file->f_mode & FMODE_READ) {
@@ -2263,7 +2222,11 @@ ftrace_regex_write(struct file *file, const char __user *ubuf,
2263 read++; 2222 read++;
2264 cnt--; 2223 cnt--;
2265 2224
2266 if (!(iter->flags & ~FTRACE_ITER_CONT)) { 2225 /*
2226 * If the parser haven't finished with the last write,
2227 * continue reading the user input without skipping spaces.
2228 */
2229 if (!(iter->flags & FTRACE_ITER_CONT)) {
2267 /* skip white space */ 2230 /* skip white space */
2268 while (cnt && isspace(ch)) { 2231 while (cnt && isspace(ch)) {
2269 ret = get_user(ch, ubuf++); 2232 ret = get_user(ch, ubuf++);
@@ -2273,8 +2236,9 @@ ftrace_regex_write(struct file *file, const char __user *ubuf,
2273 cnt--; 2236 cnt--;
2274 } 2237 }
2275 2238
2239 /* only spaces were written */
2276 if (isspace(ch)) { 2240 if (isspace(ch)) {
2277 file->f_pos += read; 2241 *ppos += read;
2278 ret = read; 2242 ret = read;
2279 goto out; 2243 goto out;
2280 } 2244 }
@@ -2297,19 +2261,18 @@ ftrace_regex_write(struct file *file, const char __user *ubuf,
2297 } 2261 }
2298 2262
2299 if (isspace(ch)) { 2263 if (isspace(ch)) {
2300 iter->filtered++;
2301 iter->buffer[iter->buffer_idx] = 0; 2264 iter->buffer[iter->buffer_idx] = 0;
2302 ret = ftrace_process_regex(iter->buffer, 2265 ret = ftrace_process_regex(iter->buffer,
2303 iter->buffer_idx, enable); 2266 iter->buffer_idx, enable);
2304 if (ret) 2267 if (ret)
2305 goto out; 2268 goto out;
2306 iter->buffer_idx = 0; 2269 iter->buffer_idx = 0;
2307 } else 2270 } else {
2308 iter->flags |= FTRACE_ITER_CONT; 2271 iter->flags |= FTRACE_ITER_CONT;
2272 iter->buffer[iter->buffer_idx++] = ch;
2273 }
2309 2274
2310 2275 *ppos += read;
2311 file->f_pos += read;
2312
2313 ret = read; 2276 ret = read;
2314 out: 2277 out:
2315 mutex_unlock(&ftrace_regex_lock); 2278 mutex_unlock(&ftrace_regex_lock);
@@ -2428,7 +2391,6 @@ ftrace_regex_release(struct inode *inode, struct file *file, int enable)
2428 iter = file->private_data; 2391 iter = file->private_data;
2429 2392
2430 if (iter->buffer_idx) { 2393 if (iter->buffer_idx) {
2431 iter->filtered++;
2432 iter->buffer[iter->buffer_idx] = 0; 2394 iter->buffer[iter->buffer_idx] = 0;
2433 ftrace_match_records(iter->buffer, iter->buffer_idx, enable); 2395 ftrace_match_records(iter->buffer, iter->buffer_idx, enable);
2434 } 2396 }
@@ -2459,14 +2421,14 @@ static const struct file_operations ftrace_avail_fops = {
2459 .open = ftrace_avail_open, 2421 .open = ftrace_avail_open,
2460 .read = seq_read, 2422 .read = seq_read,
2461 .llseek = seq_lseek, 2423 .llseek = seq_lseek,
2462 .release = ftrace_avail_release, 2424 .release = seq_release_private,
2463}; 2425};
2464 2426
2465static const struct file_operations ftrace_failures_fops = { 2427static const struct file_operations ftrace_failures_fops = {
2466 .open = ftrace_failures_open, 2428 .open = ftrace_failures_open,
2467 .read = seq_read, 2429 .read = seq_read,
2468 .llseek = seq_lseek, 2430 .llseek = seq_lseek,
2469 .release = ftrace_avail_release, 2431 .release = seq_release_private,
2470}; 2432};
2471 2433
2472static const struct file_operations ftrace_filter_fops = { 2434static const struct file_operations ftrace_filter_fops = {
@@ -2493,32 +2455,31 @@ int ftrace_graph_count;
2493unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS] __read_mostly; 2455unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS] __read_mostly;
2494 2456
2495static void * 2457static void *
2496g_next(struct seq_file *m, void *v, loff_t *pos) 2458__g_next(struct seq_file *m, loff_t *pos)
2497{ 2459{
2498 unsigned long *array = m->private; 2460 unsigned long *array = m->private;
2499 int index = *pos;
2500
2501 (*pos)++;
2502 2461
2503 if (index >= ftrace_graph_count) 2462 if (*pos >= ftrace_graph_count)
2504 return NULL; 2463 return NULL;
2464 return &array[*pos];
2465}
2505 2466
2506 return &array[index]; 2467static void *
2468g_next(struct seq_file *m, void *v, loff_t *pos)
2469{
2470 (*pos)++;
2471 return __g_next(m, pos);
2507} 2472}
2508 2473
2509static void *g_start(struct seq_file *m, loff_t *pos) 2474static void *g_start(struct seq_file *m, loff_t *pos)
2510{ 2475{
2511 void *p = NULL;
2512
2513 mutex_lock(&graph_lock); 2476 mutex_lock(&graph_lock);
2514 2477
2515 /* Nothing, tell g_show to print all functions are enabled */ 2478 /* Nothing, tell g_show to print all functions are enabled */
2516 if (!ftrace_graph_count && !*pos) 2479 if (!ftrace_graph_count && !*pos)
2517 return (void *)1; 2480 return (void *)1;
2518 2481
2519 p = g_next(m, p, pos); 2482 return __g_next(m, pos);
2520
2521 return p;
2522} 2483}
2523 2484
2524static void g_stop(struct seq_file *m, void *p) 2485static void g_stop(struct seq_file *m, void *p)
@@ -2529,7 +2490,6 @@ static void g_stop(struct seq_file *m, void *p)
2529static int g_show(struct seq_file *m, void *v) 2490static int g_show(struct seq_file *m, void *v)
2530{ 2491{
2531 unsigned long *ptr = v; 2492 unsigned long *ptr = v;
2532 char str[KSYM_SYMBOL_LEN];
2533 2493
2534 if (!ptr) 2494 if (!ptr)
2535 return 0; 2495 return 0;
@@ -2539,9 +2499,7 @@ static int g_show(struct seq_file *m, void *v)
2539 return 0; 2499 return 0;
2540 } 2500 }
2541 2501
2542 kallsyms_lookup(*ptr, NULL, NULL, NULL, str); 2502 seq_printf(m, "%pf\n", v);
2543
2544 seq_printf(m, "%s\n", str);
2545 2503
2546 return 0; 2504 return 0;
2547} 2505}
@@ -2563,7 +2521,7 @@ ftrace_graph_open(struct inode *inode, struct file *file)
2563 2521
2564 mutex_lock(&graph_lock); 2522 mutex_lock(&graph_lock);
2565 if ((file->f_mode & FMODE_WRITE) && 2523 if ((file->f_mode & FMODE_WRITE) &&
2566 !(file->f_flags & O_APPEND)) { 2524 (file->f_flags & O_TRUNC)) {
2567 ftrace_graph_count = 0; 2525 ftrace_graph_count = 0;
2568 memset(ftrace_graph_funcs, 0, sizeof(ftrace_graph_funcs)); 2526 memset(ftrace_graph_funcs, 0, sizeof(ftrace_graph_funcs));
2569 } 2527 }
@@ -2582,6 +2540,14 @@ ftrace_graph_open(struct inode *inode, struct file *file)
2582} 2540}
2583 2541
2584static int 2542static int
2543ftrace_graph_release(struct inode *inode, struct file *file)
2544{
2545 if (file->f_mode & FMODE_READ)
2546 seq_release(inode, file);
2547 return 0;
2548}
2549
2550static int
2585ftrace_set_func(unsigned long *array, int *idx, char *buffer) 2551ftrace_set_func(unsigned long *array, int *idx, char *buffer)
2586{ 2552{
2587 struct dyn_ftrace *rec; 2553 struct dyn_ftrace *rec;
@@ -2710,9 +2676,10 @@ ftrace_graph_write(struct file *file, const char __user *ubuf,
2710} 2676}
2711 2677
2712static const struct file_operations ftrace_graph_fops = { 2678static const struct file_operations ftrace_graph_fops = {
2713 .open = ftrace_graph_open, 2679 .open = ftrace_graph_open,
2714 .read = seq_read, 2680 .read = seq_read,
2715 .write = ftrace_graph_write, 2681 .write = ftrace_graph_write,
2682 .release = ftrace_graph_release,
2716}; 2683};
2717#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ 2684#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
2718 2685
@@ -3145,10 +3112,10 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
3145 3112
3146 ret = proc_dointvec(table, write, file, buffer, lenp, ppos); 3113 ret = proc_dointvec(table, write, file, buffer, lenp, ppos);
3147 3114
3148 if (ret || !write || (last_ftrace_enabled == ftrace_enabled)) 3115 if (ret || !write || (last_ftrace_enabled == !!ftrace_enabled))
3149 goto out; 3116 goto out;
3150 3117
3151 last_ftrace_enabled = ftrace_enabled; 3118 last_ftrace_enabled = !!ftrace_enabled;
3152 3119
3153 if (ftrace_enabled) { 3120 if (ftrace_enabled) {
3154 3121
diff --git a/kernel/trace/kmemtrace.c b/kernel/trace/kmemtrace.c
index 86cdf671d7e2..81b1645c8549 100644
--- a/kernel/trace/kmemtrace.c
+++ b/kernel/trace/kmemtrace.c
@@ -183,11 +183,9 @@ static void kmemtrace_stop_probes(void)
183 183
184static int kmem_trace_init(struct trace_array *tr) 184static int kmem_trace_init(struct trace_array *tr)
185{ 185{
186 int cpu;
187 kmemtrace_array = tr; 186 kmemtrace_array = tr;
188 187
189 for_each_cpu_mask(cpu, cpu_possible_map) 188 tracing_reset_online_cpus(tr);
190 tracing_reset(tr, cpu);
191 189
192 kmemtrace_start_probes(); 190 kmemtrace_start_probes();
193 191
@@ -239,12 +237,52 @@ struct kmemtrace_user_event_alloc {
239}; 237};
240 238
241static enum print_line_t 239static enum print_line_t
242kmemtrace_print_alloc_user(struct trace_iterator *iter, 240kmemtrace_print_alloc(struct trace_iterator *iter, int flags)
243 struct kmemtrace_alloc_entry *entry)
244{ 241{
245 struct kmemtrace_user_event_alloc *ev_alloc;
246 struct trace_seq *s = &iter->seq; 242 struct trace_seq *s = &iter->seq;
243 struct kmemtrace_alloc_entry *entry;
244 int ret;
245
246 trace_assign_type(entry, iter->ent);
247
248 ret = trace_seq_printf(s, "type_id %d call_site %pF ptr %lu "
249 "bytes_req %lu bytes_alloc %lu gfp_flags %lu node %d\n",
250 entry->type_id, (void *)entry->call_site, (unsigned long)entry->ptr,
251 (unsigned long)entry->bytes_req, (unsigned long)entry->bytes_alloc,
252 (unsigned long)entry->gfp_flags, entry->node);
253
254 if (!ret)
255 return TRACE_TYPE_PARTIAL_LINE;
256 return TRACE_TYPE_HANDLED;
257}
258
259static enum print_line_t
260kmemtrace_print_free(struct trace_iterator *iter, int flags)
261{
262 struct trace_seq *s = &iter->seq;
263 struct kmemtrace_free_entry *entry;
264 int ret;
265
266 trace_assign_type(entry, iter->ent);
267
268 ret = trace_seq_printf(s, "type_id %d call_site %pF ptr %lu\n",
269 entry->type_id, (void *)entry->call_site,
270 (unsigned long)entry->ptr);
271
272 if (!ret)
273 return TRACE_TYPE_PARTIAL_LINE;
274 return TRACE_TYPE_HANDLED;
275}
276
277static enum print_line_t
278kmemtrace_print_alloc_user(struct trace_iterator *iter, int flags)
279{
280 struct trace_seq *s = &iter->seq;
281 struct kmemtrace_alloc_entry *entry;
247 struct kmemtrace_user_event *ev; 282 struct kmemtrace_user_event *ev;
283 struct kmemtrace_user_event_alloc *ev_alloc;
284
285 trace_assign_type(entry, iter->ent);
248 286
249 ev = trace_seq_reserve(s, sizeof(*ev)); 287 ev = trace_seq_reserve(s, sizeof(*ev));
250 if (!ev) 288 if (!ev)
@@ -271,12 +309,14 @@ kmemtrace_print_alloc_user(struct trace_iterator *iter,
271} 309}
272 310
273static enum print_line_t 311static enum print_line_t
274kmemtrace_print_free_user(struct trace_iterator *iter, 312kmemtrace_print_free_user(struct trace_iterator *iter, int flags)
275 struct kmemtrace_free_entry *entry)
276{ 313{
277 struct trace_seq *s = &iter->seq; 314 struct trace_seq *s = &iter->seq;
315 struct kmemtrace_free_entry *entry;
278 struct kmemtrace_user_event *ev; 316 struct kmemtrace_user_event *ev;
279 317
318 trace_assign_type(entry, iter->ent);
319
280 ev = trace_seq_reserve(s, sizeof(*ev)); 320 ev = trace_seq_reserve(s, sizeof(*ev));
281 if (!ev) 321 if (!ev)
282 return TRACE_TYPE_PARTIAL_LINE; 322 return TRACE_TYPE_PARTIAL_LINE;
@@ -294,12 +334,14 @@ kmemtrace_print_free_user(struct trace_iterator *iter,
294 334
295/* The two other following provide a more minimalistic output */ 335/* The two other following provide a more minimalistic output */
296static enum print_line_t 336static enum print_line_t
297kmemtrace_print_alloc_compress(struct trace_iterator *iter, 337kmemtrace_print_alloc_compress(struct trace_iterator *iter)
298 struct kmemtrace_alloc_entry *entry)
299{ 338{
339 struct kmemtrace_alloc_entry *entry;
300 struct trace_seq *s = &iter->seq; 340 struct trace_seq *s = &iter->seq;
301 int ret; 341 int ret;
302 342
343 trace_assign_type(entry, iter->ent);
344
303 /* Alloc entry */ 345 /* Alloc entry */
304 ret = trace_seq_printf(s, " + "); 346 ret = trace_seq_printf(s, " + ");
305 if (!ret) 347 if (!ret)
@@ -345,29 +387,24 @@ kmemtrace_print_alloc_compress(struct trace_iterator *iter,
345 if (!ret) 387 if (!ret)
346 return TRACE_TYPE_PARTIAL_LINE; 388 return TRACE_TYPE_PARTIAL_LINE;
347 389
348 /* Node */ 390 /* Node and call site*/
349 ret = trace_seq_printf(s, "%4d ", entry->node); 391 ret = trace_seq_printf(s, "%4d %pf\n", entry->node,
350 if (!ret) 392 (void *)entry->call_site);
351 return TRACE_TYPE_PARTIAL_LINE;
352
353 /* Call site */
354 ret = seq_print_ip_sym(s, entry->call_site, 0);
355 if (!ret) 393 if (!ret)
356 return TRACE_TYPE_PARTIAL_LINE; 394 return TRACE_TYPE_PARTIAL_LINE;
357 395
358 if (!trace_seq_printf(s, "\n"))
359 return TRACE_TYPE_PARTIAL_LINE;
360
361 return TRACE_TYPE_HANDLED; 396 return TRACE_TYPE_HANDLED;
362} 397}
363 398
364static enum print_line_t 399static enum print_line_t
365kmemtrace_print_free_compress(struct trace_iterator *iter, 400kmemtrace_print_free_compress(struct trace_iterator *iter)
366 struct kmemtrace_free_entry *entry)
367{ 401{
402 struct kmemtrace_free_entry *entry;
368 struct trace_seq *s = &iter->seq; 403 struct trace_seq *s = &iter->seq;
369 int ret; 404 int ret;
370 405
406 trace_assign_type(entry, iter->ent);
407
371 /* Free entry */ 408 /* Free entry */
372 ret = trace_seq_printf(s, " - "); 409 ret = trace_seq_printf(s, " - ");
373 if (!ret) 410 if (!ret)
@@ -401,19 +438,11 @@ kmemtrace_print_free_compress(struct trace_iterator *iter,
401 if (!ret) 438 if (!ret)
402 return TRACE_TYPE_PARTIAL_LINE; 439 return TRACE_TYPE_PARTIAL_LINE;
403 440
404 /* Skip node */ 441 /* Skip node and print call site*/
405 ret = trace_seq_printf(s, " "); 442 ret = trace_seq_printf(s, " %pf\n", (void *)entry->call_site);
406 if (!ret) 443 if (!ret)
407 return TRACE_TYPE_PARTIAL_LINE; 444 return TRACE_TYPE_PARTIAL_LINE;
408 445
409 /* Call site */
410 ret = seq_print_ip_sym(s, entry->call_site, 0);
411 if (!ret)
412 return TRACE_TYPE_PARTIAL_LINE;
413
414 if (!trace_seq_printf(s, "\n"))
415 return TRACE_TYPE_PARTIAL_LINE;
416
417 return TRACE_TYPE_HANDLED; 446 return TRACE_TYPE_HANDLED;
418} 447}
419 448
@@ -421,32 +450,31 @@ static enum print_line_t kmemtrace_print_line(struct trace_iterator *iter)
421{ 450{
422 struct trace_entry *entry = iter->ent; 451 struct trace_entry *entry = iter->ent;
423 452
424 switch (entry->type) { 453 if (!(kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL))
425 case TRACE_KMEM_ALLOC: { 454 return TRACE_TYPE_UNHANDLED;
426 struct kmemtrace_alloc_entry *field;
427
428 trace_assign_type(field, entry);
429 if (kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL)
430 return kmemtrace_print_alloc_compress(iter, field);
431 else
432 return kmemtrace_print_alloc_user(iter, field);
433 }
434
435 case TRACE_KMEM_FREE: {
436 struct kmemtrace_free_entry *field;
437
438 trace_assign_type(field, entry);
439 if (kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL)
440 return kmemtrace_print_free_compress(iter, field);
441 else
442 return kmemtrace_print_free_user(iter, field);
443 }
444 455
456 switch (entry->type) {
457 case TRACE_KMEM_ALLOC:
458 return kmemtrace_print_alloc_compress(iter);
459 case TRACE_KMEM_FREE:
460 return kmemtrace_print_free_compress(iter);
445 default: 461 default:
446 return TRACE_TYPE_UNHANDLED; 462 return TRACE_TYPE_UNHANDLED;
447 } 463 }
448} 464}
449 465
466static struct trace_event kmem_trace_alloc = {
467 .type = TRACE_KMEM_ALLOC,
468 .trace = kmemtrace_print_alloc,
469 .binary = kmemtrace_print_alloc_user,
470};
471
472static struct trace_event kmem_trace_free = {
473 .type = TRACE_KMEM_FREE,
474 .trace = kmemtrace_print_free,
475 .binary = kmemtrace_print_free_user,
476};
477
450static struct tracer kmem_tracer __read_mostly = { 478static struct tracer kmem_tracer __read_mostly = {
451 .name = "kmemtrace", 479 .name = "kmemtrace",
452 .init = kmem_trace_init, 480 .init = kmem_trace_init,
@@ -463,6 +491,21 @@ void kmemtrace_init(void)
463 491
464static int __init init_kmem_tracer(void) 492static int __init init_kmem_tracer(void)
465{ 493{
466 return register_tracer(&kmem_tracer); 494 if (!register_ftrace_event(&kmem_trace_alloc)) {
495 pr_warning("Warning: could not register kmem events\n");
496 return 1;
497 }
498
499 if (!register_ftrace_event(&kmem_trace_free)) {
500 pr_warning("Warning: could not register kmem events\n");
501 return 1;
502 }
503
504 if (!register_tracer(&kmem_tracer)) {
505 pr_warning("Warning: could not register the kmem tracer\n");
506 return 1;
507 }
508
509 return 0;
467} 510}
468device_initcall(init_kmem_tracer); 511device_initcall(init_kmem_tracer);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index dc4dc70171ce..454e74e718cf 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -206,6 +206,7 @@ EXPORT_SYMBOL_GPL(tracing_is_on);
206#define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array)) 206#define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array))
207#define RB_ALIGNMENT 4U 207#define RB_ALIGNMENT 4U
208#define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX) 208#define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
209#define RB_EVNT_MIN_SIZE 8U /* two 32bit words */
209 210
210/* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */ 211/* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */
211#define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX 212#define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX
@@ -217,17 +218,12 @@ enum {
217 218
218static inline int rb_null_event(struct ring_buffer_event *event) 219static inline int rb_null_event(struct ring_buffer_event *event)
219{ 220{
220 return event->type_len == RINGBUF_TYPE_PADDING 221 return event->type_len == RINGBUF_TYPE_PADDING && !event->time_delta;
221 && event->time_delta == 0;
222}
223
224static inline int rb_discarded_event(struct ring_buffer_event *event)
225{
226 return event->type_len == RINGBUF_TYPE_PADDING && event->time_delta;
227} 222}
228 223
229static void rb_event_set_padding(struct ring_buffer_event *event) 224static void rb_event_set_padding(struct ring_buffer_event *event)
230{ 225{
226 /* padding has a NULL time_delta */
231 event->type_len = RINGBUF_TYPE_PADDING; 227 event->type_len = RINGBUF_TYPE_PADDING;
232 event->time_delta = 0; 228 event->time_delta = 0;
233} 229}
@@ -321,6 +317,14 @@ struct buffer_data_page {
321 unsigned char data[]; /* data of buffer page */ 317 unsigned char data[]; /* data of buffer page */
322}; 318};
323 319
320/*
321 * Note, the buffer_page list must be first. The buffer pages
322 * are allocated in cache lines, which means that each buffer
323 * page will be at the beginning of a cache line, and thus
324 * the least significant bits will be zero. We use this to
325 * add flags in the list struct pointers, to make the ring buffer
326 * lockless.
327 */
324struct buffer_page { 328struct buffer_page {
325 struct list_head list; /* list of buffer pages */ 329 struct list_head list; /* list of buffer pages */
326 local_t write; /* index for next write */ 330 local_t write; /* index for next write */
@@ -329,6 +333,21 @@ struct buffer_page {
329 struct buffer_data_page *page; /* Actual data page */ 333 struct buffer_data_page *page; /* Actual data page */
330}; 334};
331 335
336/*
337 * The buffer page counters, write and entries, must be reset
338 * atomically when crossing page boundaries. To synchronize this
339 * update, two counters are inserted into the number. One is
340 * the actual counter for the write position or count on the page.
341 *
342 * The other is a counter of updaters. Before an update happens
343 * the update partition of the counter is incremented. This will
344 * allow the updater to update the counter atomically.
345 *
346 * The counter is 20 bits, and the state data is 12.
347 */
348#define RB_WRITE_MASK 0xfffff
349#define RB_WRITE_INTCNT (1 << 20)
350
332static void rb_init_page(struct buffer_data_page *bpage) 351static void rb_init_page(struct buffer_data_page *bpage)
333{ 352{
334 local_set(&bpage->commit, 0); 353 local_set(&bpage->commit, 0);
@@ -402,19 +421,20 @@ int ring_buffer_print_page_header(struct trace_seq *s)
402struct ring_buffer_per_cpu { 421struct ring_buffer_per_cpu {
403 int cpu; 422 int cpu;
404 struct ring_buffer *buffer; 423 struct ring_buffer *buffer;
405 spinlock_t reader_lock; /* serialize readers */ 424 spinlock_t reader_lock; /* serialize readers */
406 raw_spinlock_t lock; 425 raw_spinlock_t lock;
407 struct lock_class_key lock_key; 426 struct lock_class_key lock_key;
408 struct list_head pages; 427 struct list_head *pages;
409 struct buffer_page *head_page; /* read from head */ 428 struct buffer_page *head_page; /* read from head */
410 struct buffer_page *tail_page; /* write to tail */ 429 struct buffer_page *tail_page; /* write to tail */
411 struct buffer_page *commit_page; /* committed pages */ 430 struct buffer_page *commit_page; /* committed pages */
412 struct buffer_page *reader_page; 431 struct buffer_page *reader_page;
413 unsigned long nmi_dropped; 432 local_t commit_overrun;
414 unsigned long commit_overrun; 433 local_t overrun;
415 unsigned long overrun;
416 unsigned long read;
417 local_t entries; 434 local_t entries;
435 local_t committing;
436 local_t commits;
437 unsigned long read;
418 u64 write_stamp; 438 u64 write_stamp;
419 u64 read_stamp; 439 u64 read_stamp;
420 atomic_t record_disabled; 440 atomic_t record_disabled;
@@ -447,14 +467,19 @@ struct ring_buffer_iter {
447}; 467};
448 468
449/* buffer may be either ring_buffer or ring_buffer_per_cpu */ 469/* buffer may be either ring_buffer or ring_buffer_per_cpu */
450#define RB_WARN_ON(buffer, cond) \ 470#define RB_WARN_ON(b, cond) \
451 ({ \ 471 ({ \
452 int _____ret = unlikely(cond); \ 472 int _____ret = unlikely(cond); \
453 if (_____ret) { \ 473 if (_____ret) { \
454 atomic_inc(&buffer->record_disabled); \ 474 if (__same_type(*(b), struct ring_buffer_per_cpu)) { \
455 WARN_ON(1); \ 475 struct ring_buffer_per_cpu *__b = \
456 } \ 476 (void *)b; \
457 _____ret; \ 477 atomic_inc(&__b->buffer->record_disabled); \
478 } else \
479 atomic_inc(&b->record_disabled); \
480 WARN_ON(1); \
481 } \
482 _____ret; \
458 }) 483 })
459 484
460/* Up this if you want to test the TIME_EXTENTS and normalization */ 485/* Up this if you want to test the TIME_EXTENTS and normalization */
@@ -486,6 +511,390 @@ void ring_buffer_normalize_time_stamp(struct ring_buffer *buffer,
486} 511}
487EXPORT_SYMBOL_GPL(ring_buffer_normalize_time_stamp); 512EXPORT_SYMBOL_GPL(ring_buffer_normalize_time_stamp);
488 513
514/*
515 * Making the ring buffer lockless makes things tricky.
516 * Although writes only happen on the CPU that they are on,
517 * and they only need to worry about interrupts. Reads can
518 * happen on any CPU.
519 *
520 * The reader page is always off the ring buffer, but when the
521 * reader finishes with a page, it needs to swap its page with
522 * a new one from the buffer. The reader needs to take from
523 * the head (writes go to the tail). But if a writer is in overwrite
524 * mode and wraps, it must push the head page forward.
525 *
526 * Here lies the problem.
527 *
528 * The reader must be careful to replace only the head page, and
529 * not another one. As described at the top of the file in the
530 * ASCII art, the reader sets its old page to point to the next
531 * page after head. It then sets the page after head to point to
532 * the old reader page. But if the writer moves the head page
533 * during this operation, the reader could end up with the tail.
534 *
535 * We use cmpxchg to help prevent this race. We also do something
536 * special with the page before head. We set the LSB to 1.
537 *
538 * When the writer must push the page forward, it will clear the
539 * bit that points to the head page, move the head, and then set
540 * the bit that points to the new head page.
541 *
542 * We also don't want an interrupt coming in and moving the head
543 * page on another writer. Thus we use the second LSB to catch
544 * that too. Thus:
545 *
546 * head->list->prev->next bit 1 bit 0
547 * ------- -------
548 * Normal page 0 0
549 * Points to head page 0 1
550 * New head page 1 0
551 *
552 * Note we can not trust the prev pointer of the head page, because:
553 *
554 * +----+ +-----+ +-----+
555 * | |------>| T |---X--->| N |
556 * | |<------| | | |
557 * +----+ +-----+ +-----+
558 * ^ ^ |
559 * | +-----+ | |
560 * +----------| R |----------+ |
561 * | |<-----------+
562 * +-----+
563 *
564 * Key: ---X--> HEAD flag set in pointer
565 * T Tail page
566 * R Reader page
567 * N Next page
568 *
569 * (see __rb_reserve_next() to see where this happens)
570 *
571 * What the above shows is that the reader just swapped out
572 * the reader page with a page in the buffer, but before it
573 * could make the new header point back to the new page added
574 * it was preempted by a writer. The writer moved forward onto
575 * the new page added by the reader and is about to move forward
576 * again.
577 *
578 * You can see, it is legitimate for the previous pointer of
579 * the head (or any page) not to point back to itself. But only
580 * temporarially.
581 */
582
583#define RB_PAGE_NORMAL 0UL
584#define RB_PAGE_HEAD 1UL
585#define RB_PAGE_UPDATE 2UL
586
587
588#define RB_FLAG_MASK 3UL
589
590/* PAGE_MOVED is not part of the mask */
591#define RB_PAGE_MOVED 4UL
592
593/*
594 * rb_list_head - remove any bit
595 */
596static struct list_head *rb_list_head(struct list_head *list)
597{
598 unsigned long val = (unsigned long)list;
599
600 return (struct list_head *)(val & ~RB_FLAG_MASK);
601}
602
603/*
604 * rb_is_head_page - test if the give page is the head page
605 *
606 * Because the reader may move the head_page pointer, we can
607 * not trust what the head page is (it may be pointing to
608 * the reader page). But if the next page is a header page,
609 * its flags will be non zero.
610 */
611static int inline
612rb_is_head_page(struct ring_buffer_per_cpu *cpu_buffer,
613 struct buffer_page *page, struct list_head *list)
614{
615 unsigned long val;
616
617 val = (unsigned long)list->next;
618
619 if ((val & ~RB_FLAG_MASK) != (unsigned long)&page->list)
620 return RB_PAGE_MOVED;
621
622 return val & RB_FLAG_MASK;
623}
624
625/*
626 * rb_is_reader_page
627 *
628 * The unique thing about the reader page, is that, if the
629 * writer is ever on it, the previous pointer never points
630 * back to the reader page.
631 */
632static int rb_is_reader_page(struct buffer_page *page)
633{
634 struct list_head *list = page->list.prev;
635
636 return rb_list_head(list->next) != &page->list;
637}
638
639/*
640 * rb_set_list_to_head - set a list_head to be pointing to head.
641 */
642static void rb_set_list_to_head(struct ring_buffer_per_cpu *cpu_buffer,
643 struct list_head *list)
644{
645 unsigned long *ptr;
646
647 ptr = (unsigned long *)&list->next;
648 *ptr |= RB_PAGE_HEAD;
649 *ptr &= ~RB_PAGE_UPDATE;
650}
651
652/*
653 * rb_head_page_activate - sets up head page
654 */
655static void rb_head_page_activate(struct ring_buffer_per_cpu *cpu_buffer)
656{
657 struct buffer_page *head;
658
659 head = cpu_buffer->head_page;
660 if (!head)
661 return;
662
663 /*
664 * Set the previous list pointer to have the HEAD flag.
665 */
666 rb_set_list_to_head(cpu_buffer, head->list.prev);
667}
668
669static void rb_list_head_clear(struct list_head *list)
670{
671 unsigned long *ptr = (unsigned long *)&list->next;
672
673 *ptr &= ~RB_FLAG_MASK;
674}
675
676/*
677 * rb_head_page_dactivate - clears head page ptr (for free list)
678 */
679static void
680rb_head_page_deactivate(struct ring_buffer_per_cpu *cpu_buffer)
681{
682 struct list_head *hd;
683
684 /* Go through the whole list and clear any pointers found. */
685 rb_list_head_clear(cpu_buffer->pages);
686
687 list_for_each(hd, cpu_buffer->pages)
688 rb_list_head_clear(hd);
689}
690
691static int rb_head_page_set(struct ring_buffer_per_cpu *cpu_buffer,
692 struct buffer_page *head,
693 struct buffer_page *prev,
694 int old_flag, int new_flag)
695{
696 struct list_head *list;
697 unsigned long val = (unsigned long)&head->list;
698 unsigned long ret;
699
700 list = &prev->list;
701
702 val &= ~RB_FLAG_MASK;
703
704 ret = (unsigned long)cmpxchg(&list->next,
705 val | old_flag, val | new_flag);
706
707 /* check if the reader took the page */
708 if ((ret & ~RB_FLAG_MASK) != val)
709 return RB_PAGE_MOVED;
710
711 return ret & RB_FLAG_MASK;
712}
713
714static int rb_head_page_set_update(struct ring_buffer_per_cpu *cpu_buffer,
715 struct buffer_page *head,
716 struct buffer_page *prev,
717 int old_flag)
718{
719 return rb_head_page_set(cpu_buffer, head, prev,
720 old_flag, RB_PAGE_UPDATE);
721}
722
723static int rb_head_page_set_head(struct ring_buffer_per_cpu *cpu_buffer,
724 struct buffer_page *head,
725 struct buffer_page *prev,
726 int old_flag)
727{
728 return rb_head_page_set(cpu_buffer, head, prev,
729 old_flag, RB_PAGE_HEAD);
730}
731
732static int rb_head_page_set_normal(struct ring_buffer_per_cpu *cpu_buffer,
733 struct buffer_page *head,
734 struct buffer_page *prev,
735 int old_flag)
736{
737 return rb_head_page_set(cpu_buffer, head, prev,
738 old_flag, RB_PAGE_NORMAL);
739}
740
741static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer,
742 struct buffer_page **bpage)
743{
744 struct list_head *p = rb_list_head((*bpage)->list.next);
745
746 *bpage = list_entry(p, struct buffer_page, list);
747}
748
749static struct buffer_page *
750rb_set_head_page(struct ring_buffer_per_cpu *cpu_buffer)
751{
752 struct buffer_page *head;
753 struct buffer_page *page;
754 struct list_head *list;
755 int i;
756
757 if (RB_WARN_ON(cpu_buffer, !cpu_buffer->head_page))
758 return NULL;
759
760 /* sanity check */
761 list = cpu_buffer->pages;
762 if (RB_WARN_ON(cpu_buffer, rb_list_head(list->prev->next) != list))
763 return NULL;
764
765 page = head = cpu_buffer->head_page;
766 /*
767 * It is possible that the writer moves the header behind
768 * where we started, and we miss in one loop.
769 * A second loop should grab the header, but we'll do
770 * three loops just because I'm paranoid.
771 */
772 for (i = 0; i < 3; i++) {
773 do {
774 if (rb_is_head_page(cpu_buffer, page, page->list.prev)) {
775 cpu_buffer->head_page = page;
776 return page;
777 }
778 rb_inc_page(cpu_buffer, &page);
779 } while (page != head);
780 }
781
782 RB_WARN_ON(cpu_buffer, 1);
783
784 return NULL;
785}
786
787static int rb_head_page_replace(struct buffer_page *old,
788 struct buffer_page *new)
789{
790 unsigned long *ptr = (unsigned long *)&old->list.prev->next;
791 unsigned long val;
792 unsigned long ret;
793
794 val = *ptr & ~RB_FLAG_MASK;
795 val |= RB_PAGE_HEAD;
796
797 ret = cmpxchg(ptr, val, &new->list);
798
799 return ret == val;
800}
801
802/*
803 * rb_tail_page_update - move the tail page forward
804 *
805 * Returns 1 if moved tail page, 0 if someone else did.
806 */
807static int rb_tail_page_update(struct ring_buffer_per_cpu *cpu_buffer,
808 struct buffer_page *tail_page,
809 struct buffer_page *next_page)
810{
811 struct buffer_page *old_tail;
812 unsigned long old_entries;
813 unsigned long old_write;
814 int ret = 0;
815
816 /*
817 * The tail page now needs to be moved forward.
818 *
819 * We need to reset the tail page, but without messing
820 * with possible erasing of data brought in by interrupts
821 * that have moved the tail page and are currently on it.
822 *
823 * We add a counter to the write field to denote this.
824 */
825 old_write = local_add_return(RB_WRITE_INTCNT, &next_page->write);
826 old_entries = local_add_return(RB_WRITE_INTCNT, &next_page->entries);
827
828 /*
829 * Just make sure we have seen our old_write and synchronize
830 * with any interrupts that come in.
831 */
832 barrier();
833
834 /*
835 * If the tail page is still the same as what we think
836 * it is, then it is up to us to update the tail
837 * pointer.
838 */
839 if (tail_page == cpu_buffer->tail_page) {
840 /* Zero the write counter */
841 unsigned long val = old_write & ~RB_WRITE_MASK;
842 unsigned long eval = old_entries & ~RB_WRITE_MASK;
843
844 /*
845 * This will only succeed if an interrupt did
846 * not come in and change it. In which case, we
847 * do not want to modify it.
848 *
849 * We add (void) to let the compiler know that we do not care
850 * about the return value of these functions. We use the
851 * cmpxchg to only update if an interrupt did not already
852 * do it for us. If the cmpxchg fails, we don't care.
853 */
854 (void)local_cmpxchg(&next_page->write, old_write, val);
855 (void)local_cmpxchg(&next_page->entries, old_entries, eval);
856
857 /*
858 * No need to worry about races with clearing out the commit.
859 * it only can increment when a commit takes place. But that
860 * only happens in the outer most nested commit.
861 */
862 local_set(&next_page->page->commit, 0);
863
864 old_tail = cmpxchg(&cpu_buffer->tail_page,
865 tail_page, next_page);
866
867 if (old_tail == tail_page)
868 ret = 1;
869 }
870
871 return ret;
872}
873
874static int rb_check_bpage(struct ring_buffer_per_cpu *cpu_buffer,
875 struct buffer_page *bpage)
876{
877 unsigned long val = (unsigned long)bpage;
878
879 if (RB_WARN_ON(cpu_buffer, val & RB_FLAG_MASK))
880 return 1;
881
882 return 0;
883}
884
885/**
886 * rb_check_list - make sure a pointer to a list has the last bits zero
887 */
888static int rb_check_list(struct ring_buffer_per_cpu *cpu_buffer,
889 struct list_head *list)
890{
891 if (RB_WARN_ON(cpu_buffer, rb_list_head(list->prev) != list->prev))
892 return 1;
893 if (RB_WARN_ON(cpu_buffer, rb_list_head(list->next) != list->next))
894 return 1;
895 return 0;
896}
897
489/** 898/**
490 * check_pages - integrity check of buffer pages 899 * check_pages - integrity check of buffer pages
491 * @cpu_buffer: CPU buffer with pages to test 900 * @cpu_buffer: CPU buffer with pages to test
@@ -495,14 +904,19 @@ EXPORT_SYMBOL_GPL(ring_buffer_normalize_time_stamp);
495 */ 904 */
496static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer) 905static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
497{ 906{
498 struct list_head *head = &cpu_buffer->pages; 907 struct list_head *head = cpu_buffer->pages;
499 struct buffer_page *bpage, *tmp; 908 struct buffer_page *bpage, *tmp;
500 909
910 rb_head_page_deactivate(cpu_buffer);
911
501 if (RB_WARN_ON(cpu_buffer, head->next->prev != head)) 912 if (RB_WARN_ON(cpu_buffer, head->next->prev != head))
502 return -1; 913 return -1;
503 if (RB_WARN_ON(cpu_buffer, head->prev->next != head)) 914 if (RB_WARN_ON(cpu_buffer, head->prev->next != head))
504 return -1; 915 return -1;
505 916
917 if (rb_check_list(cpu_buffer, head))
918 return -1;
919
506 list_for_each_entry_safe(bpage, tmp, head, list) { 920 list_for_each_entry_safe(bpage, tmp, head, list) {
507 if (RB_WARN_ON(cpu_buffer, 921 if (RB_WARN_ON(cpu_buffer,
508 bpage->list.next->prev != &bpage->list)) 922 bpage->list.next->prev != &bpage->list))
@@ -510,25 +924,33 @@ static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
510 if (RB_WARN_ON(cpu_buffer, 924 if (RB_WARN_ON(cpu_buffer,
511 bpage->list.prev->next != &bpage->list)) 925 bpage->list.prev->next != &bpage->list))
512 return -1; 926 return -1;
927 if (rb_check_list(cpu_buffer, &bpage->list))
928 return -1;
513 } 929 }
514 930
931 rb_head_page_activate(cpu_buffer);
932
515 return 0; 933 return 0;
516} 934}
517 935
518static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer, 936static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
519 unsigned nr_pages) 937 unsigned nr_pages)
520{ 938{
521 struct list_head *head = &cpu_buffer->pages;
522 struct buffer_page *bpage, *tmp; 939 struct buffer_page *bpage, *tmp;
523 unsigned long addr; 940 unsigned long addr;
524 LIST_HEAD(pages); 941 LIST_HEAD(pages);
525 unsigned i; 942 unsigned i;
526 943
944 WARN_ON(!nr_pages);
945
527 for (i = 0; i < nr_pages; i++) { 946 for (i = 0; i < nr_pages; i++) {
528 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), 947 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
529 GFP_KERNEL, cpu_to_node(cpu_buffer->cpu)); 948 GFP_KERNEL, cpu_to_node(cpu_buffer->cpu));
530 if (!bpage) 949 if (!bpage)
531 goto free_pages; 950 goto free_pages;
951
952 rb_check_bpage(cpu_buffer, bpage);
953
532 list_add(&bpage->list, &pages); 954 list_add(&bpage->list, &pages);
533 955
534 addr = __get_free_page(GFP_KERNEL); 956 addr = __get_free_page(GFP_KERNEL);
@@ -538,7 +960,13 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
538 rb_init_page(bpage->page); 960 rb_init_page(bpage->page);
539 } 961 }
540 962
541 list_splice(&pages, head); 963 /*
964 * The ring buffer page list is a circular list that does not
965 * start and end with a list head. All page list items point to
966 * other pages.
967 */
968 cpu_buffer->pages = pages.next;
969 list_del(&pages);
542 970
543 rb_check_pages(cpu_buffer); 971 rb_check_pages(cpu_buffer);
544 972
@@ -570,13 +998,14 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
570 spin_lock_init(&cpu_buffer->reader_lock); 998 spin_lock_init(&cpu_buffer->reader_lock);
571 lockdep_set_class(&cpu_buffer->reader_lock, buffer->reader_lock_key); 999 lockdep_set_class(&cpu_buffer->reader_lock, buffer->reader_lock_key);
572 cpu_buffer->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; 1000 cpu_buffer->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
573 INIT_LIST_HEAD(&cpu_buffer->pages);
574 1001
575 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), 1002 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
576 GFP_KERNEL, cpu_to_node(cpu)); 1003 GFP_KERNEL, cpu_to_node(cpu));
577 if (!bpage) 1004 if (!bpage)
578 goto fail_free_buffer; 1005 goto fail_free_buffer;
579 1006
1007 rb_check_bpage(cpu_buffer, bpage);
1008
580 cpu_buffer->reader_page = bpage; 1009 cpu_buffer->reader_page = bpage;
581 addr = __get_free_page(GFP_KERNEL); 1010 addr = __get_free_page(GFP_KERNEL);
582 if (!addr) 1011 if (!addr)
@@ -591,9 +1020,11 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
591 goto fail_free_reader; 1020 goto fail_free_reader;
592 1021
593 cpu_buffer->head_page 1022 cpu_buffer->head_page
594 = list_entry(cpu_buffer->pages.next, struct buffer_page, list); 1023 = list_entry(cpu_buffer->pages, struct buffer_page, list);
595 cpu_buffer->tail_page = cpu_buffer->commit_page = cpu_buffer->head_page; 1024 cpu_buffer->tail_page = cpu_buffer->commit_page = cpu_buffer->head_page;
596 1025
1026 rb_head_page_activate(cpu_buffer);
1027
597 return cpu_buffer; 1028 return cpu_buffer;
598 1029
599 fail_free_reader: 1030 fail_free_reader:
@@ -606,24 +1037,25 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
606 1037
607static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer) 1038static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
608{ 1039{
609 struct list_head *head = &cpu_buffer->pages; 1040 struct list_head *head = cpu_buffer->pages;
610 struct buffer_page *bpage, *tmp; 1041 struct buffer_page *bpage, *tmp;
611 1042
612 free_buffer_page(cpu_buffer->reader_page); 1043 free_buffer_page(cpu_buffer->reader_page);
613 1044
614 list_for_each_entry_safe(bpage, tmp, head, list) { 1045 rb_head_page_deactivate(cpu_buffer);
615 list_del_init(&bpage->list); 1046
1047 if (head) {
1048 list_for_each_entry_safe(bpage, tmp, head, list) {
1049 list_del_init(&bpage->list);
1050 free_buffer_page(bpage);
1051 }
1052 bpage = list_entry(head, struct buffer_page, list);
616 free_buffer_page(bpage); 1053 free_buffer_page(bpage);
617 } 1054 }
1055
618 kfree(cpu_buffer); 1056 kfree(cpu_buffer);
619} 1057}
620 1058
621/*
622 * Causes compile errors if the struct buffer_page gets bigger
623 * than the struct page.
624 */
625extern int ring_buffer_page_too_big(void);
626
627#ifdef CONFIG_HOTPLUG_CPU 1059#ifdef CONFIG_HOTPLUG_CPU
628static int rb_cpu_notify(struct notifier_block *self, 1060static int rb_cpu_notify(struct notifier_block *self,
629 unsigned long action, void *hcpu); 1061 unsigned long action, void *hcpu);
@@ -646,11 +1078,6 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
646 int bsize; 1078 int bsize;
647 int cpu; 1079 int cpu;
648 1080
649 /* Paranoid! Optimizes out when all is well */
650 if (sizeof(struct buffer_page) > sizeof(struct page))
651 ring_buffer_page_too_big();
652
653
654 /* keep it in its own cache line */ 1081 /* keep it in its own cache line */
655 buffer = kzalloc(ALIGN(sizeof(*buffer), cache_line_size()), 1082 buffer = kzalloc(ALIGN(sizeof(*buffer), cache_line_size()),
656 GFP_KERNEL); 1083 GFP_KERNEL);
@@ -666,8 +1093,8 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
666 buffer->reader_lock_key = key; 1093 buffer->reader_lock_key = key;
667 1094
668 /* need at least two pages */ 1095 /* need at least two pages */
669 if (buffer->pages == 1) 1096 if (buffer->pages < 2)
670 buffer->pages++; 1097 buffer->pages = 2;
671 1098
672 /* 1099 /*
673 * In case of non-hotplug cpu, if the ring-buffer is allocated 1100 * In case of non-hotplug cpu, if the ring-buffer is allocated
@@ -743,6 +1170,7 @@ ring_buffer_free(struct ring_buffer *buffer)
743 1170
744 put_online_cpus(); 1171 put_online_cpus();
745 1172
1173 kfree(buffer->buffers);
746 free_cpumask_var(buffer->cpumask); 1174 free_cpumask_var(buffer->cpumask);
747 1175
748 kfree(buffer); 1176 kfree(buffer);
@@ -767,15 +1195,17 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
767 atomic_inc(&cpu_buffer->record_disabled); 1195 atomic_inc(&cpu_buffer->record_disabled);
768 synchronize_sched(); 1196 synchronize_sched();
769 1197
1198 rb_head_page_deactivate(cpu_buffer);
1199
770 for (i = 0; i < nr_pages; i++) { 1200 for (i = 0; i < nr_pages; i++) {
771 if (RB_WARN_ON(cpu_buffer, list_empty(&cpu_buffer->pages))) 1201 if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages)))
772 return; 1202 return;
773 p = cpu_buffer->pages.next; 1203 p = cpu_buffer->pages->next;
774 bpage = list_entry(p, struct buffer_page, list); 1204 bpage = list_entry(p, struct buffer_page, list);
775 list_del_init(&bpage->list); 1205 list_del_init(&bpage->list);
776 free_buffer_page(bpage); 1206 free_buffer_page(bpage);
777 } 1207 }
778 if (RB_WARN_ON(cpu_buffer, list_empty(&cpu_buffer->pages))) 1208 if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages)))
779 return; 1209 return;
780 1210
781 rb_reset_cpu(cpu_buffer); 1211 rb_reset_cpu(cpu_buffer);
@@ -797,15 +1227,19 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
797 atomic_inc(&cpu_buffer->record_disabled); 1227 atomic_inc(&cpu_buffer->record_disabled);
798 synchronize_sched(); 1228 synchronize_sched();
799 1229
1230 spin_lock_irq(&cpu_buffer->reader_lock);
1231 rb_head_page_deactivate(cpu_buffer);
1232
800 for (i = 0; i < nr_pages; i++) { 1233 for (i = 0; i < nr_pages; i++) {
801 if (RB_WARN_ON(cpu_buffer, list_empty(pages))) 1234 if (RB_WARN_ON(cpu_buffer, list_empty(pages)))
802 return; 1235 return;
803 p = pages->next; 1236 p = pages->next;
804 bpage = list_entry(p, struct buffer_page, list); 1237 bpage = list_entry(p, struct buffer_page, list);
805 list_del_init(&bpage->list); 1238 list_del_init(&bpage->list);
806 list_add_tail(&bpage->list, &cpu_buffer->pages); 1239 list_add_tail(&bpage->list, cpu_buffer->pages);
807 } 1240 }
808 rb_reset_cpu(cpu_buffer); 1241 rb_reset_cpu(cpu_buffer);
1242 spin_unlock_irq(&cpu_buffer->reader_lock);
809 1243
810 rb_check_pages(cpu_buffer); 1244 rb_check_pages(cpu_buffer);
811 1245
@@ -956,21 +1390,14 @@ rb_reader_event(struct ring_buffer_per_cpu *cpu_buffer)
956} 1390}
957 1391
958static inline struct ring_buffer_event * 1392static inline struct ring_buffer_event *
959rb_head_event(struct ring_buffer_per_cpu *cpu_buffer)
960{
961 return __rb_page_index(cpu_buffer->head_page,
962 cpu_buffer->head_page->read);
963}
964
965static inline struct ring_buffer_event *
966rb_iter_head_event(struct ring_buffer_iter *iter) 1393rb_iter_head_event(struct ring_buffer_iter *iter)
967{ 1394{
968 return __rb_page_index(iter->head_page, iter->head); 1395 return __rb_page_index(iter->head_page, iter->head);
969} 1396}
970 1397
971static inline unsigned rb_page_write(struct buffer_page *bpage) 1398static inline unsigned long rb_page_write(struct buffer_page *bpage)
972{ 1399{
973 return local_read(&bpage->write); 1400 return local_read(&bpage->write) & RB_WRITE_MASK;
974} 1401}
975 1402
976static inline unsigned rb_page_commit(struct buffer_page *bpage) 1403static inline unsigned rb_page_commit(struct buffer_page *bpage)
@@ -978,6 +1405,11 @@ static inline unsigned rb_page_commit(struct buffer_page *bpage)
978 return local_read(&bpage->page->commit); 1405 return local_read(&bpage->page->commit);
979} 1406}
980 1407
1408static inline unsigned long rb_page_entries(struct buffer_page *bpage)
1409{
1410 return local_read(&bpage->entries) & RB_WRITE_MASK;
1411}
1412
981/* Size is determined by what has been commited */ 1413/* Size is determined by what has been commited */
982static inline unsigned rb_page_size(struct buffer_page *bpage) 1414static inline unsigned rb_page_size(struct buffer_page *bpage)
983{ 1415{
@@ -990,33 +1422,17 @@ rb_commit_index(struct ring_buffer_per_cpu *cpu_buffer)
990 return rb_page_commit(cpu_buffer->commit_page); 1422 return rb_page_commit(cpu_buffer->commit_page);
991} 1423}
992 1424
993static inline unsigned rb_head_size(struct ring_buffer_per_cpu *cpu_buffer)
994{
995 return rb_page_commit(cpu_buffer->head_page);
996}
997
998static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer,
999 struct buffer_page **bpage)
1000{
1001 struct list_head *p = (*bpage)->list.next;
1002
1003 if (p == &cpu_buffer->pages)
1004 p = p->next;
1005
1006 *bpage = list_entry(p, struct buffer_page, list);
1007}
1008
1009static inline unsigned 1425static inline unsigned
1010rb_event_index(struct ring_buffer_event *event) 1426rb_event_index(struct ring_buffer_event *event)
1011{ 1427{
1012 unsigned long addr = (unsigned long)event; 1428 unsigned long addr = (unsigned long)event;
1013 1429
1014 return (addr & ~PAGE_MASK) - (PAGE_SIZE - BUF_PAGE_SIZE); 1430 return (addr & ~PAGE_MASK) - BUF_PAGE_HDR_SIZE;
1015} 1431}
1016 1432
1017static inline int 1433static inline int
1018rb_is_commit(struct ring_buffer_per_cpu *cpu_buffer, 1434rb_event_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
1019 struct ring_buffer_event *event) 1435 struct ring_buffer_event *event)
1020{ 1436{
1021 unsigned long addr = (unsigned long)event; 1437 unsigned long addr = (unsigned long)event;
1022 unsigned long index; 1438 unsigned long index;
@@ -1029,33 +1445,10 @@ rb_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
1029} 1445}
1030 1446
1031static void 1447static void
1032rb_set_commit_event(struct ring_buffer_per_cpu *cpu_buffer,
1033 struct ring_buffer_event *event)
1034{
1035 unsigned long addr = (unsigned long)event;
1036 unsigned long index;
1037
1038 index = rb_event_index(event);
1039 addr &= PAGE_MASK;
1040
1041 while (cpu_buffer->commit_page->page != (void *)addr) {
1042 if (RB_WARN_ON(cpu_buffer,
1043 cpu_buffer->commit_page == cpu_buffer->tail_page))
1044 return;
1045 cpu_buffer->commit_page->page->commit =
1046 cpu_buffer->commit_page->write;
1047 rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
1048 cpu_buffer->write_stamp =
1049 cpu_buffer->commit_page->page->time_stamp;
1050 }
1051
1052 /* Now set the commit to the event's index */
1053 local_set(&cpu_buffer->commit_page->page->commit, index);
1054}
1055
1056static void
1057rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer) 1448rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
1058{ 1449{
1450 unsigned long max_count;
1451
1059 /* 1452 /*
1060 * We only race with interrupts and NMIs on this CPU. 1453 * We only race with interrupts and NMIs on this CPU.
1061 * If we own the commit event, then we can commit 1454 * If we own the commit event, then we can commit
@@ -1065,9 +1458,16 @@ rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
1065 * assign the commit to the tail. 1458 * assign the commit to the tail.
1066 */ 1459 */
1067 again: 1460 again:
1461 max_count = cpu_buffer->buffer->pages * 100;
1462
1068 while (cpu_buffer->commit_page != cpu_buffer->tail_page) { 1463 while (cpu_buffer->commit_page != cpu_buffer->tail_page) {
1069 cpu_buffer->commit_page->page->commit = 1464 if (RB_WARN_ON(cpu_buffer, !(--max_count)))
1070 cpu_buffer->commit_page->write; 1465 return;
1466 if (RB_WARN_ON(cpu_buffer,
1467 rb_is_reader_page(cpu_buffer->tail_page)))
1468 return;
1469 local_set(&cpu_buffer->commit_page->page->commit,
1470 rb_page_write(cpu_buffer->commit_page));
1071 rb_inc_page(cpu_buffer, &cpu_buffer->commit_page); 1471 rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
1072 cpu_buffer->write_stamp = 1472 cpu_buffer->write_stamp =
1073 cpu_buffer->commit_page->page->time_stamp; 1473 cpu_buffer->commit_page->page->time_stamp;
@@ -1076,8 +1476,12 @@ rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
1076 } 1476 }
1077 while (rb_commit_index(cpu_buffer) != 1477 while (rb_commit_index(cpu_buffer) !=
1078 rb_page_write(cpu_buffer->commit_page)) { 1478 rb_page_write(cpu_buffer->commit_page)) {
1079 cpu_buffer->commit_page->page->commit = 1479
1080 cpu_buffer->commit_page->write; 1480 local_set(&cpu_buffer->commit_page->page->commit,
1481 rb_page_write(cpu_buffer->commit_page));
1482 RB_WARN_ON(cpu_buffer,
1483 local_read(&cpu_buffer->commit_page->page->commit) &
1484 ~RB_WRITE_MASK);
1081 barrier(); 1485 barrier();
1082 } 1486 }
1083 1487
@@ -1110,7 +1514,7 @@ static void rb_inc_iter(struct ring_buffer_iter *iter)
1110 * to the head page instead of next. 1514 * to the head page instead of next.
1111 */ 1515 */
1112 if (iter->head_page == cpu_buffer->reader_page) 1516 if (iter->head_page == cpu_buffer->reader_page)
1113 iter->head_page = cpu_buffer->head_page; 1517 iter->head_page = rb_set_head_page(cpu_buffer);
1114 else 1518 else
1115 rb_inc_page(cpu_buffer, &iter->head_page); 1519 rb_inc_page(cpu_buffer, &iter->head_page);
1116 1520
@@ -1154,6 +1558,163 @@ rb_update_event(struct ring_buffer_event *event,
1154 } 1558 }
1155} 1559}
1156 1560
1561/*
1562 * rb_handle_head_page - writer hit the head page
1563 *
1564 * Returns: +1 to retry page
1565 * 0 to continue
1566 * -1 on error
1567 */
1568static int
1569rb_handle_head_page(struct ring_buffer_per_cpu *cpu_buffer,
1570 struct buffer_page *tail_page,
1571 struct buffer_page *next_page)
1572{
1573 struct buffer_page *new_head;
1574 int entries;
1575 int type;
1576 int ret;
1577
1578 entries = rb_page_entries(next_page);
1579
1580 /*
1581 * The hard part is here. We need to move the head
1582 * forward, and protect against both readers on
1583 * other CPUs and writers coming in via interrupts.
1584 */
1585 type = rb_head_page_set_update(cpu_buffer, next_page, tail_page,
1586 RB_PAGE_HEAD);
1587
1588 /*
1589 * type can be one of four:
1590 * NORMAL - an interrupt already moved it for us
1591 * HEAD - we are the first to get here.
1592 * UPDATE - we are the interrupt interrupting
1593 * a current move.
1594 * MOVED - a reader on another CPU moved the next
1595 * pointer to its reader page. Give up
1596 * and try again.
1597 */
1598
1599 switch (type) {
1600 case RB_PAGE_HEAD:
1601 /*
1602 * We changed the head to UPDATE, thus
1603 * it is our responsibility to update
1604 * the counters.
1605 */
1606 local_add(entries, &cpu_buffer->overrun);
1607
1608 /*
1609 * The entries will be zeroed out when we move the
1610 * tail page.
1611 */
1612
1613 /* still more to do */
1614 break;
1615
1616 case RB_PAGE_UPDATE:
1617 /*
1618 * This is an interrupt that interrupt the
1619 * previous update. Still more to do.
1620 */
1621 break;
1622 case RB_PAGE_NORMAL:
1623 /*
1624 * An interrupt came in before the update
1625 * and processed this for us.
1626 * Nothing left to do.
1627 */
1628 return 1;
1629 case RB_PAGE_MOVED:
1630 /*
1631 * The reader is on another CPU and just did
1632 * a swap with our next_page.
1633 * Try again.
1634 */
1635 return 1;
1636 default:
1637 RB_WARN_ON(cpu_buffer, 1); /* WTF??? */
1638 return -1;
1639 }
1640
1641 /*
1642 * Now that we are here, the old head pointer is
1643 * set to UPDATE. This will keep the reader from
1644 * swapping the head page with the reader page.
1645 * The reader (on another CPU) will spin till
1646 * we are finished.
1647 *
1648 * We just need to protect against interrupts
1649 * doing the job. We will set the next pointer
1650 * to HEAD. After that, we set the old pointer
1651 * to NORMAL, but only if it was HEAD before.
1652 * otherwise we are an interrupt, and only
1653 * want the outer most commit to reset it.
1654 */
1655 new_head = next_page;
1656 rb_inc_page(cpu_buffer, &new_head);
1657
1658 ret = rb_head_page_set_head(cpu_buffer, new_head, next_page,
1659 RB_PAGE_NORMAL);
1660
1661 /*
1662 * Valid returns are:
1663 * HEAD - an interrupt came in and already set it.
1664 * NORMAL - One of two things:
1665 * 1) We really set it.
1666 * 2) A bunch of interrupts came in and moved
1667 * the page forward again.
1668 */
1669 switch (ret) {
1670 case RB_PAGE_HEAD:
1671 case RB_PAGE_NORMAL:
1672 /* OK */
1673 break;
1674 default:
1675 RB_WARN_ON(cpu_buffer, 1);
1676 return -1;
1677 }
1678
1679 /*
1680 * It is possible that an interrupt came in,
1681 * set the head up, then more interrupts came in
1682 * and moved it again. When we get back here,
1683 * the page would have been set to NORMAL but we
1684 * just set it back to HEAD.
1685 *
1686 * How do you detect this? Well, if that happened
1687 * the tail page would have moved.
1688 */
1689 if (ret == RB_PAGE_NORMAL) {
1690 /*
1691 * If the tail had moved passed next, then we need
1692 * to reset the pointer.
1693 */
1694 if (cpu_buffer->tail_page != tail_page &&
1695 cpu_buffer->tail_page != next_page)
1696 rb_head_page_set_normal(cpu_buffer, new_head,
1697 next_page,
1698 RB_PAGE_HEAD);
1699 }
1700
1701 /*
1702 * If this was the outer most commit (the one that
1703 * changed the original pointer from HEAD to UPDATE),
1704 * then it is up to us to reset it to NORMAL.
1705 */
1706 if (type == RB_PAGE_HEAD) {
1707 ret = rb_head_page_set_normal(cpu_buffer, next_page,
1708 tail_page,
1709 RB_PAGE_UPDATE);
1710 if (RB_WARN_ON(cpu_buffer,
1711 ret != RB_PAGE_UPDATE))
1712 return -1;
1713 }
1714
1715 return 0;
1716}
1717
1157static unsigned rb_calculate_event_length(unsigned length) 1718static unsigned rb_calculate_event_length(unsigned length)
1158{ 1719{
1159 struct ring_buffer_event event; /* Used only for sizeof array */ 1720 struct ring_buffer_event event; /* Used only for sizeof array */
@@ -1171,6 +1732,57 @@ static unsigned rb_calculate_event_length(unsigned length)
1171 return length; 1732 return length;
1172} 1733}
1173 1734
1735static inline void
1736rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
1737 struct buffer_page *tail_page,
1738 unsigned long tail, unsigned long length)
1739{
1740 struct ring_buffer_event *event;
1741
1742 /*
1743 * Only the event that crossed the page boundary
1744 * must fill the old tail_page with padding.
1745 */
1746 if (tail >= BUF_PAGE_SIZE) {
1747 local_sub(length, &tail_page->write);
1748 return;
1749 }
1750
1751 event = __rb_page_index(tail_page, tail);
1752 kmemcheck_annotate_bitfield(event, bitfield);
1753
1754 /*
1755 * If this event is bigger than the minimum size, then
1756 * we need to be careful that we don't subtract the
1757 * write counter enough to allow another writer to slip
1758 * in on this page.
1759 * We put in a discarded commit instead, to make sure
1760 * that this space is not used again.
1761 *
1762 * If we are less than the minimum size, we don't need to
1763 * worry about it.
1764 */
1765 if (tail > (BUF_PAGE_SIZE - RB_EVNT_MIN_SIZE)) {
1766 /* No room for any events */
1767
1768 /* Mark the rest of the page with padding */
1769 rb_event_set_padding(event);
1770
1771 /* Set the write back to the previous setting */
1772 local_sub(length, &tail_page->write);
1773 return;
1774 }
1775
1776 /* Put in a discarded event */
1777 event->array[0] = (BUF_PAGE_SIZE - tail) - RB_EVNT_HDR_SIZE;
1778 event->type_len = RINGBUF_TYPE_PADDING;
1779 /* time delta must be non zero */
1780 event->time_delta = 1;
1781
1782 /* Set write to end of buffer */
1783 length = (tail + length) - BUF_PAGE_SIZE;
1784 local_sub(length, &tail_page->write);
1785}
1174 1786
1175static struct ring_buffer_event * 1787static struct ring_buffer_event *
1176rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, 1788rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
@@ -1178,128 +1790,101 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
1178 struct buffer_page *commit_page, 1790 struct buffer_page *commit_page,
1179 struct buffer_page *tail_page, u64 *ts) 1791 struct buffer_page *tail_page, u64 *ts)
1180{ 1792{
1181 struct buffer_page *next_page, *head_page, *reader_page;
1182 struct ring_buffer *buffer = cpu_buffer->buffer; 1793 struct ring_buffer *buffer = cpu_buffer->buffer;
1183 struct ring_buffer_event *event; 1794 struct buffer_page *next_page;
1184 bool lock_taken = false; 1795 int ret;
1185 unsigned long flags;
1186 1796
1187 next_page = tail_page; 1797 next_page = tail_page;
1188 1798
1189 local_irq_save(flags);
1190 /*
1191 * Since the write to the buffer is still not
1192 * fully lockless, we must be careful with NMIs.
1193 * The locks in the writers are taken when a write
1194 * crosses to a new page. The locks protect against
1195 * races with the readers (this will soon be fixed
1196 * with a lockless solution).
1197 *
1198 * Because we can not protect against NMIs, and we
1199 * want to keep traces reentrant, we need to manage
1200 * what happens when we are in an NMI.
1201 *
1202 * NMIs can happen after we take the lock.
1203 * If we are in an NMI, only take the lock
1204 * if it is not already taken. Otherwise
1205 * simply fail.
1206 */
1207 if (unlikely(in_nmi())) {
1208 if (!__raw_spin_trylock(&cpu_buffer->lock)) {
1209 cpu_buffer->nmi_dropped++;
1210 goto out_reset;
1211 }
1212 } else
1213 __raw_spin_lock(&cpu_buffer->lock);
1214
1215 lock_taken = true;
1216
1217 rb_inc_page(cpu_buffer, &next_page); 1799 rb_inc_page(cpu_buffer, &next_page);
1218 1800
1219 head_page = cpu_buffer->head_page;
1220 reader_page = cpu_buffer->reader_page;
1221
1222 /* we grabbed the lock before incrementing */
1223 if (RB_WARN_ON(cpu_buffer, next_page == reader_page))
1224 goto out_reset;
1225
1226 /* 1801 /*
1227 * If for some reason, we had an interrupt storm that made 1802 * If for some reason, we had an interrupt storm that made
1228 * it all the way around the buffer, bail, and warn 1803 * it all the way around the buffer, bail, and warn
1229 * about it. 1804 * about it.
1230 */ 1805 */
1231 if (unlikely(next_page == commit_page)) { 1806 if (unlikely(next_page == commit_page)) {
1232 cpu_buffer->commit_overrun++; 1807 local_inc(&cpu_buffer->commit_overrun);
1233 goto out_reset; 1808 goto out_reset;
1234 } 1809 }
1235 1810
1236 if (next_page == head_page) {
1237 if (!(buffer->flags & RB_FL_OVERWRITE))
1238 goto out_reset;
1239
1240 /* tail_page has not moved yet? */
1241 if (tail_page == cpu_buffer->tail_page) {
1242 /* count overflows */
1243 cpu_buffer->overrun +=
1244 local_read(&head_page->entries);
1245
1246 rb_inc_page(cpu_buffer, &head_page);
1247 cpu_buffer->head_page = head_page;
1248 cpu_buffer->head_page->read = 0;
1249 }
1250 }
1251
1252 /* 1811 /*
1253 * If the tail page is still the same as what we think 1812 * This is where the fun begins!
1254 * it is, then it is up to us to update the tail 1813 *
1255 * pointer. 1814 * We are fighting against races between a reader that
1815 * could be on another CPU trying to swap its reader
1816 * page with the buffer head.
1817 *
1818 * We are also fighting against interrupts coming in and
1819 * moving the head or tail on us as well.
1820 *
1821 * If the next page is the head page then we have filled
1822 * the buffer, unless the commit page is still on the
1823 * reader page.
1256 */ 1824 */
1257 if (tail_page == cpu_buffer->tail_page) { 1825 if (rb_is_head_page(cpu_buffer, next_page, &tail_page->list)) {
1258 local_set(&next_page->write, 0);
1259 local_set(&next_page->entries, 0);
1260 local_set(&next_page->page->commit, 0);
1261 cpu_buffer->tail_page = next_page;
1262 1826
1263 /* reread the time stamp */ 1827 /*
1264 *ts = rb_time_stamp(buffer, cpu_buffer->cpu); 1828 * If the commit is not on the reader page, then
1265 cpu_buffer->tail_page->page->time_stamp = *ts; 1829 * move the header page.
1830 */
1831 if (!rb_is_reader_page(cpu_buffer->commit_page)) {
1832 /*
1833 * If we are not in overwrite mode,
1834 * this is easy, just stop here.
1835 */
1836 if (!(buffer->flags & RB_FL_OVERWRITE))
1837 goto out_reset;
1838
1839 ret = rb_handle_head_page(cpu_buffer,
1840 tail_page,
1841 next_page);
1842 if (ret < 0)
1843 goto out_reset;
1844 if (ret)
1845 goto out_again;
1846 } else {
1847 /*
1848 * We need to be careful here too. The
1849 * commit page could still be on the reader
1850 * page. We could have a small buffer, and
1851 * have filled up the buffer with events
1852 * from interrupts and such, and wrapped.
1853 *
1854 * Note, if the tail page is also the on the
1855 * reader_page, we let it move out.
1856 */
1857 if (unlikely((cpu_buffer->commit_page !=
1858 cpu_buffer->tail_page) &&
1859 (cpu_buffer->commit_page ==
1860 cpu_buffer->reader_page))) {
1861 local_inc(&cpu_buffer->commit_overrun);
1862 goto out_reset;
1863 }
1864 }
1266 } 1865 }
1267 1866
1268 /* 1867 ret = rb_tail_page_update(cpu_buffer, tail_page, next_page);
1269 * The actual tail page has moved forward. 1868 if (ret) {
1270 */ 1869 /*
1271 if (tail < BUF_PAGE_SIZE) { 1870 * Nested commits always have zero deltas, so
1272 /* Mark the rest of the page with padding */ 1871 * just reread the time stamp
1273 event = __rb_page_index(tail_page, tail); 1872 */
1274 kmemcheck_annotate_bitfield(event, bitfield); 1873 *ts = rb_time_stamp(buffer, cpu_buffer->cpu);
1275 rb_event_set_padding(event); 1874 next_page->page->time_stamp = *ts;
1276 } 1875 }
1277 1876
1278 /* Set the write back to the previous setting */ 1877 out_again:
1279 local_sub(length, &tail_page->write);
1280 1878
1281 /* 1879 rb_reset_tail(cpu_buffer, tail_page, tail, length);
1282 * If this was a commit entry that failed,
1283 * increment that too
1284 */
1285 if (tail_page == cpu_buffer->commit_page &&
1286 tail == rb_commit_index(cpu_buffer)) {
1287 rb_set_commit_to_write(cpu_buffer);
1288 }
1289
1290 __raw_spin_unlock(&cpu_buffer->lock);
1291 local_irq_restore(flags);
1292 1880
1293 /* fail and let the caller try again */ 1881 /* fail and let the caller try again */
1294 return ERR_PTR(-EAGAIN); 1882 return ERR_PTR(-EAGAIN);
1295 1883
1296 out_reset: 1884 out_reset:
1297 /* reset write */ 1885 /* reset write */
1298 local_sub(length, &tail_page->write); 1886 rb_reset_tail(cpu_buffer, tail_page, tail, length);
1299 1887
1300 if (likely(lock_taken))
1301 __raw_spin_unlock(&cpu_buffer->lock);
1302 local_irq_restore(flags);
1303 return NULL; 1888 return NULL;
1304} 1889}
1305 1890
@@ -1316,6 +1901,9 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
1316 barrier(); 1901 barrier();
1317 tail_page = cpu_buffer->tail_page; 1902 tail_page = cpu_buffer->tail_page;
1318 write = local_add_return(length, &tail_page->write); 1903 write = local_add_return(length, &tail_page->write);
1904
1905 /* set write to only the index of the write */
1906 write &= RB_WRITE_MASK;
1319 tail = write - length; 1907 tail = write - length;
1320 1908
1321 /* See if we shot pass the end of this buffer page */ 1909 /* See if we shot pass the end of this buffer page */
@@ -1325,9 +1913,6 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
1325 1913
1326 /* We reserved something on the buffer */ 1914 /* We reserved something on the buffer */
1327 1915
1328 if (RB_WARN_ON(cpu_buffer, write > BUF_PAGE_SIZE))
1329 return NULL;
1330
1331 event = __rb_page_index(tail_page, tail); 1916 event = __rb_page_index(tail_page, tail);
1332 kmemcheck_annotate_bitfield(event, bitfield); 1917 kmemcheck_annotate_bitfield(event, bitfield);
1333 rb_update_event(event, type, length); 1918 rb_update_event(event, type, length);
@@ -1337,11 +1922,11 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
1337 local_inc(&tail_page->entries); 1922 local_inc(&tail_page->entries);
1338 1923
1339 /* 1924 /*
1340 * If this is a commit and the tail is zero, then update 1925 * If this is the first commit on the page, then update
1341 * this page's time stamp. 1926 * its timestamp.
1342 */ 1927 */
1343 if (!tail && rb_is_commit(cpu_buffer, event)) 1928 if (!tail)
1344 cpu_buffer->commit_page->page->time_stamp = *ts; 1929 tail_page->page->time_stamp = *ts;
1345 1930
1346 return event; 1931 return event;
1347} 1932}
@@ -1363,12 +1948,16 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
1363 bpage = cpu_buffer->tail_page; 1948 bpage = cpu_buffer->tail_page;
1364 1949
1365 if (bpage->page == (void *)addr && rb_page_write(bpage) == old_index) { 1950 if (bpage->page == (void *)addr && rb_page_write(bpage) == old_index) {
1951 unsigned long write_mask =
1952 local_read(&bpage->write) & ~RB_WRITE_MASK;
1366 /* 1953 /*
1367 * This is on the tail page. It is possible that 1954 * This is on the tail page. It is possible that
1368 * a write could come in and move the tail page 1955 * a write could come in and move the tail page
1369 * and write to the next page. That is fine 1956 * and write to the next page. That is fine
1370 * because we just shorten what is on this page. 1957 * because we just shorten what is on this page.
1371 */ 1958 */
1959 old_index += write_mask;
1960 new_index += write_mask;
1372 index = local_cmpxchg(&bpage->write, old_index, new_index); 1961 index = local_cmpxchg(&bpage->write, old_index, new_index);
1373 if (index == old_index) 1962 if (index == old_index)
1374 return 1; 1963 return 1;
@@ -1410,16 +1999,16 @@ rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
1410 return -EAGAIN; 1999 return -EAGAIN;
1411 2000
1412 /* Only a commited time event can update the write stamp */ 2001 /* Only a commited time event can update the write stamp */
1413 if (rb_is_commit(cpu_buffer, event)) { 2002 if (rb_event_is_commit(cpu_buffer, event)) {
1414 /* 2003 /*
1415 * If this is the first on the page, then we need to 2004 * If this is the first on the page, then it was
1416 * update the page itself, and just put in a zero. 2005 * updated with the page itself. Try to discard it
2006 * and if we can't just make it zero.
1417 */ 2007 */
1418 if (rb_event_index(event)) { 2008 if (rb_event_index(event)) {
1419 event->time_delta = *delta & TS_MASK; 2009 event->time_delta = *delta & TS_MASK;
1420 event->array[0] = *delta >> TS_SHIFT; 2010 event->array[0] = *delta >> TS_SHIFT;
1421 } else { 2011 } else {
1422 cpu_buffer->commit_page->page->time_stamp = *ts;
1423 /* try to discard, since we do not need this */ 2012 /* try to discard, since we do not need this */
1424 if (!rb_try_to_discard(cpu_buffer, event)) { 2013 if (!rb_try_to_discard(cpu_buffer, event)) {
1425 /* nope, just zero it */ 2014 /* nope, just zero it */
@@ -1445,8 +2034,47 @@ rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
1445 return ret; 2034 return ret;
1446} 2035}
1447 2036
2037static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer)
2038{
2039 local_inc(&cpu_buffer->committing);
2040 local_inc(&cpu_buffer->commits);
2041}
2042
2043static void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer)
2044{
2045 unsigned long commits;
2046
2047 if (RB_WARN_ON(cpu_buffer,
2048 !local_read(&cpu_buffer->committing)))
2049 return;
2050
2051 again:
2052 commits = local_read(&cpu_buffer->commits);
2053 /* synchronize with interrupts */
2054 barrier();
2055 if (local_read(&cpu_buffer->committing) == 1)
2056 rb_set_commit_to_write(cpu_buffer);
2057
2058 local_dec(&cpu_buffer->committing);
2059
2060 /* synchronize with interrupts */
2061 barrier();
2062
2063 /*
2064 * Need to account for interrupts coming in between the
2065 * updating of the commit page and the clearing of the
2066 * committing counter.
2067 */
2068 if (unlikely(local_read(&cpu_buffer->commits) != commits) &&
2069 !local_read(&cpu_buffer->committing)) {
2070 local_inc(&cpu_buffer->committing);
2071 goto again;
2072 }
2073}
2074
1448static struct ring_buffer_event * 2075static struct ring_buffer_event *
1449rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, 2076rb_reserve_next_event(struct ring_buffer *buffer,
2077 struct ring_buffer_per_cpu *cpu_buffer,
1450 unsigned long length) 2078 unsigned long length)
1451{ 2079{
1452 struct ring_buffer_event *event; 2080 struct ring_buffer_event *event;
@@ -1454,6 +2082,23 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
1454 int commit = 0; 2082 int commit = 0;
1455 int nr_loops = 0; 2083 int nr_loops = 0;
1456 2084
2085 rb_start_commit(cpu_buffer);
2086
2087#ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
2088 /*
2089 * Due to the ability to swap a cpu buffer from a buffer
2090 * it is possible it was swapped before we committed.
2091 * (committing stops a swap). We check for it here and
2092 * if it happened, we have to fail the write.
2093 */
2094 barrier();
2095 if (unlikely(ACCESS_ONCE(cpu_buffer->buffer) != buffer)) {
2096 local_dec(&cpu_buffer->committing);
2097 local_dec(&cpu_buffer->commits);
2098 return NULL;
2099 }
2100#endif
2101
1457 length = rb_calculate_event_length(length); 2102 length = rb_calculate_event_length(length);
1458 again: 2103 again:
1459 /* 2104 /*
@@ -1466,7 +2111,7 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
1466 * Bail! 2111 * Bail!
1467 */ 2112 */
1468 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000)) 2113 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000))
1469 return NULL; 2114 goto out_fail;
1470 2115
1471 ts = rb_time_stamp(cpu_buffer->buffer, cpu_buffer->cpu); 2116 ts = rb_time_stamp(cpu_buffer->buffer, cpu_buffer->cpu);
1472 2117
@@ -1497,7 +2142,7 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
1497 2142
1498 commit = rb_add_time_stamp(cpu_buffer, &ts, &delta); 2143 commit = rb_add_time_stamp(cpu_buffer, &ts, &delta);
1499 if (commit == -EBUSY) 2144 if (commit == -EBUSY)
1500 return NULL; 2145 goto out_fail;
1501 2146
1502 if (commit == -EAGAIN) 2147 if (commit == -EAGAIN)
1503 goto again; 2148 goto again;
@@ -1511,30 +2156,23 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
1511 if (unlikely(PTR_ERR(event) == -EAGAIN)) 2156 if (unlikely(PTR_ERR(event) == -EAGAIN))
1512 goto again; 2157 goto again;
1513 2158
1514 if (!event) { 2159 if (!event)
1515 if (unlikely(commit)) 2160 goto out_fail;
1516 /*
1517 * Ouch! We needed a timestamp and it was commited. But
1518 * we didn't get our event reserved.
1519 */
1520 rb_set_commit_to_write(cpu_buffer);
1521 return NULL;
1522 }
1523 2161
1524 /* 2162 if (!rb_event_is_commit(cpu_buffer, event))
1525 * If the timestamp was commited, make the commit our entry
1526 * now so that we will update it when needed.
1527 */
1528 if (unlikely(commit))
1529 rb_set_commit_event(cpu_buffer, event);
1530 else if (!rb_is_commit(cpu_buffer, event))
1531 delta = 0; 2163 delta = 0;
1532 2164
1533 event->time_delta = delta; 2165 event->time_delta = delta;
1534 2166
1535 return event; 2167 return event;
2168
2169 out_fail:
2170 rb_end_commit(cpu_buffer);
2171 return NULL;
1536} 2172}
1537 2173
2174#ifdef CONFIG_TRACING
2175
1538#define TRACE_RECURSIVE_DEPTH 16 2176#define TRACE_RECURSIVE_DEPTH 16
1539 2177
1540static int trace_recursive_lock(void) 2178static int trace_recursive_lock(void)
@@ -1565,6 +2203,13 @@ static void trace_recursive_unlock(void)
1565 current->trace_recursion--; 2203 current->trace_recursion--;
1566} 2204}
1567 2205
2206#else
2207
2208#define trace_recursive_lock() (0)
2209#define trace_recursive_unlock() do { } while (0)
2210
2211#endif
2212
1568static DEFINE_PER_CPU(int, rb_need_resched); 2213static DEFINE_PER_CPU(int, rb_need_resched);
1569 2214
1570/** 2215/**
@@ -1614,7 +2259,7 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
1614 if (length > BUF_MAX_DATA_SIZE) 2259 if (length > BUF_MAX_DATA_SIZE)
1615 goto out; 2260 goto out;
1616 2261
1617 event = rb_reserve_next_event(cpu_buffer, length); 2262 event = rb_reserve_next_event(buffer, cpu_buffer, length);
1618 if (!event) 2263 if (!event)
1619 goto out; 2264 goto out;
1620 2265
@@ -1637,18 +2282,24 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
1637} 2282}
1638EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve); 2283EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve);
1639 2284
2285static void
2286rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer,
2287 struct ring_buffer_event *event)
2288{
2289 /*
2290 * The event first in the commit queue updates the
2291 * time stamp.
2292 */
2293 if (rb_event_is_commit(cpu_buffer, event))
2294 cpu_buffer->write_stamp += event->time_delta;
2295}
2296
1640static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer, 2297static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
1641 struct ring_buffer_event *event) 2298 struct ring_buffer_event *event)
1642{ 2299{
1643 local_inc(&cpu_buffer->entries); 2300 local_inc(&cpu_buffer->entries);
1644 2301 rb_update_write_stamp(cpu_buffer, event);
1645 /* Only process further if we own the commit */ 2302 rb_end_commit(cpu_buffer);
1646 if (!rb_is_commit(cpu_buffer, event))
1647 return;
1648
1649 cpu_buffer->write_stamp += event->time_delta;
1650
1651 rb_set_commit_to_write(cpu_buffer);
1652} 2303}
1653 2304
1654/** 2305/**
@@ -1694,32 +2345,57 @@ static inline void rb_event_discard(struct ring_buffer_event *event)
1694 event->time_delta = 1; 2345 event->time_delta = 1;
1695} 2346}
1696 2347
1697/** 2348/*
1698 * ring_buffer_event_discard - discard any event in the ring buffer 2349 * Decrement the entries to the page that an event is on.
1699 * @event: the event to discard 2350 * The event does not even need to exist, only the pointer
1700 * 2351 * to the page it is on. This may only be called before the commit
1701 * Sometimes a event that is in the ring buffer needs to be ignored. 2352 * takes place.
1702 * This function lets the user discard an event in the ring buffer
1703 * and then that event will not be read later.
1704 *
1705 * Note, it is up to the user to be careful with this, and protect
1706 * against races. If the user discards an event that has been consumed
1707 * it is possible that it could corrupt the ring buffer.
1708 */ 2353 */
1709void ring_buffer_event_discard(struct ring_buffer_event *event) 2354static inline void
2355rb_decrement_entry(struct ring_buffer_per_cpu *cpu_buffer,
2356 struct ring_buffer_event *event)
1710{ 2357{
1711 rb_event_discard(event); 2358 unsigned long addr = (unsigned long)event;
2359 struct buffer_page *bpage = cpu_buffer->commit_page;
2360 struct buffer_page *start;
2361
2362 addr &= PAGE_MASK;
2363
2364 /* Do the likely case first */
2365 if (likely(bpage->page == (void *)addr)) {
2366 local_dec(&bpage->entries);
2367 return;
2368 }
2369
2370 /*
2371 * Because the commit page may be on the reader page we
2372 * start with the next page and check the end loop there.
2373 */
2374 rb_inc_page(cpu_buffer, &bpage);
2375 start = bpage;
2376 do {
2377 if (bpage->page == (void *)addr) {
2378 local_dec(&bpage->entries);
2379 return;
2380 }
2381 rb_inc_page(cpu_buffer, &bpage);
2382 } while (bpage != start);
2383
2384 /* commit not part of this buffer?? */
2385 RB_WARN_ON(cpu_buffer, 1);
1712} 2386}
1713EXPORT_SYMBOL_GPL(ring_buffer_event_discard);
1714 2387
1715/** 2388/**
1716 * ring_buffer_commit_discard - discard an event that has not been committed 2389 * ring_buffer_commit_discard - discard an event that has not been committed
1717 * @buffer: the ring buffer 2390 * @buffer: the ring buffer
1718 * @event: non committed event to discard 2391 * @event: non committed event to discard
1719 * 2392 *
1720 * This is similar to ring_buffer_event_discard but must only be 2393 * Sometimes an event that is in the ring buffer needs to be ignored.
1721 * performed on an event that has not been committed yet. The difference 2394 * This function lets the user discard an event in the ring buffer
1722 * is that this will also try to free the event from the ring buffer 2395 * and then that event will not be read later.
2396 *
2397 * This function only works if it is called before the the item has been
2398 * committed. It will try to free the event from the ring buffer
1723 * if another event has not been added behind it. 2399 * if another event has not been added behind it.
1724 * 2400 *
1725 * If another event has been added behind it, it will set the event 2401 * If another event has been added behind it, it will set the event
@@ -1737,32 +2413,27 @@ void ring_buffer_discard_commit(struct ring_buffer *buffer,
1737 /* The event is discarded regardless */ 2413 /* The event is discarded regardless */
1738 rb_event_discard(event); 2414 rb_event_discard(event);
1739 2415
2416 cpu = smp_processor_id();
2417 cpu_buffer = buffer->buffers[cpu];
2418
1740 /* 2419 /*
1741 * This must only be called if the event has not been 2420 * This must only be called if the event has not been
1742 * committed yet. Thus we can assume that preemption 2421 * committed yet. Thus we can assume that preemption
1743 * is still disabled. 2422 * is still disabled.
1744 */ 2423 */
1745 RB_WARN_ON(buffer, preemptible()); 2424 RB_WARN_ON(buffer, !local_read(&cpu_buffer->committing));
1746 2425
1747 cpu = smp_processor_id(); 2426 rb_decrement_entry(cpu_buffer, event);
1748 cpu_buffer = buffer->buffers[cpu]; 2427 if (rb_try_to_discard(cpu_buffer, event))
1749
1750 if (!rb_try_to_discard(cpu_buffer, event))
1751 goto out; 2428 goto out;
1752 2429
1753 /* 2430 /*
1754 * The commit is still visible by the reader, so we 2431 * The commit is still visible by the reader, so we
1755 * must increment entries. 2432 * must still update the timestamp.
1756 */ 2433 */
1757 local_inc(&cpu_buffer->entries); 2434 rb_update_write_stamp(cpu_buffer, event);
1758 out: 2435 out:
1759 /* 2436 rb_end_commit(cpu_buffer);
1760 * If a write came in and pushed the tail page
1761 * we still need to update the commit pointer
1762 * if we were the commit.
1763 */
1764 if (rb_is_commit(cpu_buffer, event))
1765 rb_set_commit_to_write(cpu_buffer);
1766 2437
1767 trace_recursive_unlock(); 2438 trace_recursive_unlock();
1768 2439
@@ -1821,7 +2492,7 @@ int ring_buffer_write(struct ring_buffer *buffer,
1821 if (length > BUF_MAX_DATA_SIZE) 2492 if (length > BUF_MAX_DATA_SIZE)
1822 goto out; 2493 goto out;
1823 2494
1824 event = rb_reserve_next_event(cpu_buffer, length); 2495 event = rb_reserve_next_event(buffer, cpu_buffer, length);
1825 if (!event) 2496 if (!event)
1826 goto out; 2497 goto out;
1827 2498
@@ -1842,9 +2513,13 @@ EXPORT_SYMBOL_GPL(ring_buffer_write);
1842static int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer) 2513static int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer)
1843{ 2514{
1844 struct buffer_page *reader = cpu_buffer->reader_page; 2515 struct buffer_page *reader = cpu_buffer->reader_page;
1845 struct buffer_page *head = cpu_buffer->head_page; 2516 struct buffer_page *head = rb_set_head_page(cpu_buffer);
1846 struct buffer_page *commit = cpu_buffer->commit_page; 2517 struct buffer_page *commit = cpu_buffer->commit_page;
1847 2518
2519 /* In case of error, head will be NULL */
2520 if (unlikely(!head))
2521 return 1;
2522
1848 return reader->read == rb_page_commit(reader) && 2523 return reader->read == rb_page_commit(reader) &&
1849 (commit == reader || 2524 (commit == reader ||
1850 (commit == head && 2525 (commit == head &&
@@ -1935,7 +2610,7 @@ unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu)
1935 return 0; 2610 return 0;
1936 2611
1937 cpu_buffer = buffer->buffers[cpu]; 2612 cpu_buffer = buffer->buffers[cpu];
1938 ret = (local_read(&cpu_buffer->entries) - cpu_buffer->overrun) 2613 ret = (local_read(&cpu_buffer->entries) - local_read(&cpu_buffer->overrun))
1939 - cpu_buffer->read; 2614 - cpu_buffer->read;
1940 2615
1941 return ret; 2616 return ret;
@@ -1956,33 +2631,13 @@ unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu)
1956 return 0; 2631 return 0;
1957 2632
1958 cpu_buffer = buffer->buffers[cpu]; 2633 cpu_buffer = buffer->buffers[cpu];
1959 ret = cpu_buffer->overrun; 2634 ret = local_read(&cpu_buffer->overrun);
1960 2635
1961 return ret; 2636 return ret;
1962} 2637}
1963EXPORT_SYMBOL_GPL(ring_buffer_overrun_cpu); 2638EXPORT_SYMBOL_GPL(ring_buffer_overrun_cpu);
1964 2639
1965/** 2640/**
1966 * ring_buffer_nmi_dropped_cpu - get the number of nmis that were dropped
1967 * @buffer: The ring buffer
1968 * @cpu: The per CPU buffer to get the number of overruns from
1969 */
1970unsigned long ring_buffer_nmi_dropped_cpu(struct ring_buffer *buffer, int cpu)
1971{
1972 struct ring_buffer_per_cpu *cpu_buffer;
1973 unsigned long ret;
1974
1975 if (!cpumask_test_cpu(cpu, buffer->cpumask))
1976 return 0;
1977
1978 cpu_buffer = buffer->buffers[cpu];
1979 ret = cpu_buffer->nmi_dropped;
1980
1981 return ret;
1982}
1983EXPORT_SYMBOL_GPL(ring_buffer_nmi_dropped_cpu);
1984
1985/**
1986 * ring_buffer_commit_overrun_cpu - get the number of overruns caused by commits 2641 * ring_buffer_commit_overrun_cpu - get the number of overruns caused by commits
1987 * @buffer: The ring buffer 2642 * @buffer: The ring buffer
1988 * @cpu: The per CPU buffer to get the number of overruns from 2643 * @cpu: The per CPU buffer to get the number of overruns from
@@ -1997,7 +2652,7 @@ ring_buffer_commit_overrun_cpu(struct ring_buffer *buffer, int cpu)
1997 return 0; 2652 return 0;
1998 2653
1999 cpu_buffer = buffer->buffers[cpu]; 2654 cpu_buffer = buffer->buffers[cpu];
2000 ret = cpu_buffer->commit_overrun; 2655 ret = local_read(&cpu_buffer->commit_overrun);
2001 2656
2002 return ret; 2657 return ret;
2003} 2658}
@@ -2020,7 +2675,7 @@ unsigned long ring_buffer_entries(struct ring_buffer *buffer)
2020 for_each_buffer_cpu(buffer, cpu) { 2675 for_each_buffer_cpu(buffer, cpu) {
2021 cpu_buffer = buffer->buffers[cpu]; 2676 cpu_buffer = buffer->buffers[cpu];
2022 entries += (local_read(&cpu_buffer->entries) - 2677 entries += (local_read(&cpu_buffer->entries) -
2023 cpu_buffer->overrun) - cpu_buffer->read; 2678 local_read(&cpu_buffer->overrun)) - cpu_buffer->read;
2024 } 2679 }
2025 2680
2026 return entries; 2681 return entries;
@@ -2043,7 +2698,7 @@ unsigned long ring_buffer_overruns(struct ring_buffer *buffer)
2043 /* if you care about this being correct, lock the buffer */ 2698 /* if you care about this being correct, lock the buffer */
2044 for_each_buffer_cpu(buffer, cpu) { 2699 for_each_buffer_cpu(buffer, cpu) {
2045 cpu_buffer = buffer->buffers[cpu]; 2700 cpu_buffer = buffer->buffers[cpu];
2046 overruns += cpu_buffer->overrun; 2701 overruns += local_read(&cpu_buffer->overrun);
2047 } 2702 }
2048 2703
2049 return overruns; 2704 return overruns;
@@ -2056,8 +2711,10 @@ static void rb_iter_reset(struct ring_buffer_iter *iter)
2056 2711
2057 /* Iterator usage is expected to have record disabled */ 2712 /* Iterator usage is expected to have record disabled */
2058 if (list_empty(&cpu_buffer->reader_page->list)) { 2713 if (list_empty(&cpu_buffer->reader_page->list)) {
2059 iter->head_page = cpu_buffer->head_page; 2714 iter->head_page = rb_set_head_page(cpu_buffer);
2060 iter->head = cpu_buffer->head_page->read; 2715 if (unlikely(!iter->head_page))
2716 return;
2717 iter->head = iter->head_page->read;
2061 } else { 2718 } else {
2062 iter->head_page = cpu_buffer->reader_page; 2719 iter->head_page = cpu_buffer->reader_page;
2063 iter->head = cpu_buffer->reader_page->read; 2720 iter->head = cpu_buffer->reader_page->read;
@@ -2174,6 +2831,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
2174 struct buffer_page *reader = NULL; 2831 struct buffer_page *reader = NULL;
2175 unsigned long flags; 2832 unsigned long flags;
2176 int nr_loops = 0; 2833 int nr_loops = 0;
2834 int ret;
2177 2835
2178 local_irq_save(flags); 2836 local_irq_save(flags);
2179 __raw_spin_lock(&cpu_buffer->lock); 2837 __raw_spin_lock(&cpu_buffer->lock);
@@ -2207,30 +2865,56 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
2207 goto out; 2865 goto out;
2208 2866
2209 /* 2867 /*
2210 * Splice the empty reader page into the list around the head.
2211 * Reset the reader page to size zero. 2868 * Reset the reader page to size zero.
2212 */ 2869 */
2870 local_set(&cpu_buffer->reader_page->write, 0);
2871 local_set(&cpu_buffer->reader_page->entries, 0);
2872 local_set(&cpu_buffer->reader_page->page->commit, 0);
2213 2873
2214 reader = cpu_buffer->head_page; 2874 spin:
2875 /*
2876 * Splice the empty reader page into the list around the head.
2877 */
2878 reader = rb_set_head_page(cpu_buffer);
2215 cpu_buffer->reader_page->list.next = reader->list.next; 2879 cpu_buffer->reader_page->list.next = reader->list.next;
2216 cpu_buffer->reader_page->list.prev = reader->list.prev; 2880 cpu_buffer->reader_page->list.prev = reader->list.prev;
2217 2881
2218 local_set(&cpu_buffer->reader_page->write, 0); 2882 /*
2219 local_set(&cpu_buffer->reader_page->entries, 0); 2883 * cpu_buffer->pages just needs to point to the buffer, it
2220 local_set(&cpu_buffer->reader_page->page->commit, 0); 2884 * has no specific buffer page to point to. Lets move it out
2885 * of our way so we don't accidently swap it.
2886 */
2887 cpu_buffer->pages = reader->list.prev;
2221 2888
2222 /* Make the reader page now replace the head */ 2889 /* The reader page will be pointing to the new head */
2223 reader->list.prev->next = &cpu_buffer->reader_page->list; 2890 rb_set_list_to_head(cpu_buffer, &cpu_buffer->reader_page->list);
2224 reader->list.next->prev = &cpu_buffer->reader_page->list; 2891
2892 /*
2893 * Here's the tricky part.
2894 *
2895 * We need to move the pointer past the header page.
2896 * But we can only do that if a writer is not currently
2897 * moving it. The page before the header page has the
2898 * flag bit '1' set if it is pointing to the page we want.
2899 * but if the writer is in the process of moving it
2900 * than it will be '2' or already moved '0'.
2901 */
2902
2903 ret = rb_head_page_replace(reader, cpu_buffer->reader_page);
2225 2904
2226 /* 2905 /*
2227 * If the tail is on the reader, then we must set the head 2906 * If we did not convert it, then we must try again.
2228 * to the inserted page, otherwise we set it one before.
2229 */ 2907 */
2230 cpu_buffer->head_page = cpu_buffer->reader_page; 2908 if (!ret)
2909 goto spin;
2231 2910
2232 if (cpu_buffer->commit_page != reader) 2911 /*
2233 rb_inc_page(cpu_buffer, &cpu_buffer->head_page); 2912 * Yeah! We succeeded in replacing the page.
2913 *
2914 * Now make the new head point back to the reader page.
2915 */
2916 reader->list.next->prev = &cpu_buffer->reader_page->list;
2917 rb_inc_page(cpu_buffer, &cpu_buffer->head_page);
2234 2918
2235 /* Finally update the reader page to the new head */ 2919 /* Finally update the reader page to the new head */
2236 cpu_buffer->reader_page = reader; 2920 cpu_buffer->reader_page = reader;
@@ -2259,8 +2943,7 @@ static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer)
2259 2943
2260 event = rb_reader_event(cpu_buffer); 2944 event = rb_reader_event(cpu_buffer);
2261 2945
2262 if (event->type_len <= RINGBUF_TYPE_DATA_TYPE_LEN_MAX 2946 if (event->type_len <= RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
2263 || rb_discarded_event(event))
2264 cpu_buffer->read++; 2947 cpu_buffer->read++;
2265 2948
2266 rb_update_read_stamp(cpu_buffer, event); 2949 rb_update_read_stamp(cpu_buffer, event);
@@ -2351,7 +3034,6 @@ rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
2351 * the box. Return the padding, and we will release 3034 * the box. Return the padding, and we will release
2352 * the current locks, and try again. 3035 * the current locks, and try again.
2353 */ 3036 */
2354 rb_advance_reader(cpu_buffer);
2355 return event; 3037 return event;
2356 3038
2357 case RINGBUF_TYPE_TIME_EXTEND: 3039 case RINGBUF_TYPE_TIME_EXTEND:
@@ -2446,6 +3128,21 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
2446} 3128}
2447EXPORT_SYMBOL_GPL(ring_buffer_iter_peek); 3129EXPORT_SYMBOL_GPL(ring_buffer_iter_peek);
2448 3130
3131static inline int rb_ok_to_lock(void)
3132{
3133 /*
3134 * If an NMI die dumps out the content of the ring buffer
3135 * do not grab locks. We also permanently disable the ring
3136 * buffer too. A one time deal is all you get from reading
3137 * the ring buffer from an NMI.
3138 */
3139 if (likely(!in_nmi()))
3140 return 1;
3141
3142 tracing_off_permanent();
3143 return 0;
3144}
3145
2449/** 3146/**
2450 * ring_buffer_peek - peek at the next event to be read 3147 * ring_buffer_peek - peek at the next event to be read
2451 * @buffer: The ring buffer to read 3148 * @buffer: The ring buffer to read
@@ -2461,19 +3158,25 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
2461 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; 3158 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
2462 struct ring_buffer_event *event; 3159 struct ring_buffer_event *event;
2463 unsigned long flags; 3160 unsigned long flags;
3161 int dolock;
2464 3162
2465 if (!cpumask_test_cpu(cpu, buffer->cpumask)) 3163 if (!cpumask_test_cpu(cpu, buffer->cpumask))
2466 return NULL; 3164 return NULL;
2467 3165
3166 dolock = rb_ok_to_lock();
2468 again: 3167 again:
2469 spin_lock_irqsave(&cpu_buffer->reader_lock, flags); 3168 local_irq_save(flags);
3169 if (dolock)
3170 spin_lock(&cpu_buffer->reader_lock);
2470 event = rb_buffer_peek(buffer, cpu, ts); 3171 event = rb_buffer_peek(buffer, cpu, ts);
2471 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 3172 if (event && event->type_len == RINGBUF_TYPE_PADDING)
3173 rb_advance_reader(cpu_buffer);
3174 if (dolock)
3175 spin_unlock(&cpu_buffer->reader_lock);
3176 local_irq_restore(flags);
2472 3177
2473 if (event && event->type_len == RINGBUF_TYPE_PADDING) { 3178 if (event && event->type_len == RINGBUF_TYPE_PADDING)
2474 cpu_relax();
2475 goto again; 3179 goto again;
2476 }
2477 3180
2478 return event; 3181 return event;
2479} 3182}
@@ -2498,10 +3201,8 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
2498 event = rb_iter_peek(iter, ts); 3201 event = rb_iter_peek(iter, ts);
2499 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 3202 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
2500 3203
2501 if (event && event->type_len == RINGBUF_TYPE_PADDING) { 3204 if (event && event->type_len == RINGBUF_TYPE_PADDING)
2502 cpu_relax();
2503 goto again; 3205 goto again;
2504 }
2505 3206
2506 return event; 3207 return event;
2507} 3208}
@@ -2520,6 +3221,9 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
2520 struct ring_buffer_per_cpu *cpu_buffer; 3221 struct ring_buffer_per_cpu *cpu_buffer;
2521 struct ring_buffer_event *event = NULL; 3222 struct ring_buffer_event *event = NULL;
2522 unsigned long flags; 3223 unsigned long flags;
3224 int dolock;
3225
3226 dolock = rb_ok_to_lock();
2523 3227
2524 again: 3228 again:
2525 /* might be called in atomic */ 3229 /* might be called in atomic */
@@ -2529,24 +3233,23 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
2529 goto out; 3233 goto out;
2530 3234
2531 cpu_buffer = buffer->buffers[cpu]; 3235 cpu_buffer = buffer->buffers[cpu];
2532 spin_lock_irqsave(&cpu_buffer->reader_lock, flags); 3236 local_irq_save(flags);
3237 if (dolock)
3238 spin_lock(&cpu_buffer->reader_lock);
2533 3239
2534 event = rb_buffer_peek(buffer, cpu, ts); 3240 event = rb_buffer_peek(buffer, cpu, ts);
2535 if (!event) 3241 if (event)
2536 goto out_unlock; 3242 rb_advance_reader(cpu_buffer);
2537
2538 rb_advance_reader(cpu_buffer);
2539 3243
2540 out_unlock: 3244 if (dolock)
2541 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 3245 spin_unlock(&cpu_buffer->reader_lock);
3246 local_irq_restore(flags);
2542 3247
2543 out: 3248 out:
2544 preempt_enable(); 3249 preempt_enable();
2545 3250
2546 if (event && event->type_len == RINGBUF_TYPE_PADDING) { 3251 if (event && event->type_len == RINGBUF_TYPE_PADDING)
2547 cpu_relax();
2548 goto again; 3252 goto again;
2549 }
2550 3253
2551 return event; 3254 return event;
2552} 3255}
@@ -2626,21 +3329,19 @@ ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts)
2626 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; 3329 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
2627 unsigned long flags; 3330 unsigned long flags;
2628 3331
2629 again:
2630 spin_lock_irqsave(&cpu_buffer->reader_lock, flags); 3332 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
3333 again:
2631 event = rb_iter_peek(iter, ts); 3334 event = rb_iter_peek(iter, ts);
2632 if (!event) 3335 if (!event)
2633 goto out; 3336 goto out;
2634 3337
3338 if (event->type_len == RINGBUF_TYPE_PADDING)
3339 goto again;
3340
2635 rb_advance_iter(iter); 3341 rb_advance_iter(iter);
2636 out: 3342 out:
2637 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 3343 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
2638 3344
2639 if (event && event->type_len == RINGBUF_TYPE_PADDING) {
2640 cpu_relax();
2641 goto again;
2642 }
2643
2644 return event; 3345 return event;
2645} 3346}
2646EXPORT_SYMBOL_GPL(ring_buffer_read); 3347EXPORT_SYMBOL_GPL(ring_buffer_read);
@@ -2658,8 +3359,10 @@ EXPORT_SYMBOL_GPL(ring_buffer_size);
2658static void 3359static void
2659rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer) 3360rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
2660{ 3361{
3362 rb_head_page_deactivate(cpu_buffer);
3363
2661 cpu_buffer->head_page 3364 cpu_buffer->head_page
2662 = list_entry(cpu_buffer->pages.next, struct buffer_page, list); 3365 = list_entry(cpu_buffer->pages, struct buffer_page, list);
2663 local_set(&cpu_buffer->head_page->write, 0); 3366 local_set(&cpu_buffer->head_page->write, 0);
2664 local_set(&cpu_buffer->head_page->entries, 0); 3367 local_set(&cpu_buffer->head_page->entries, 0);
2665 local_set(&cpu_buffer->head_page->page->commit, 0); 3368 local_set(&cpu_buffer->head_page->page->commit, 0);
@@ -2675,14 +3378,17 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
2675 local_set(&cpu_buffer->reader_page->page->commit, 0); 3378 local_set(&cpu_buffer->reader_page->page->commit, 0);
2676 cpu_buffer->reader_page->read = 0; 3379 cpu_buffer->reader_page->read = 0;
2677 3380
2678 cpu_buffer->nmi_dropped = 0; 3381 local_set(&cpu_buffer->commit_overrun, 0);
2679 cpu_buffer->commit_overrun = 0; 3382 local_set(&cpu_buffer->overrun, 0);
2680 cpu_buffer->overrun = 0;
2681 cpu_buffer->read = 0;
2682 local_set(&cpu_buffer->entries, 0); 3383 local_set(&cpu_buffer->entries, 0);
3384 local_set(&cpu_buffer->committing, 0);
3385 local_set(&cpu_buffer->commits, 0);
3386 cpu_buffer->read = 0;
2683 3387
2684 cpu_buffer->write_stamp = 0; 3388 cpu_buffer->write_stamp = 0;
2685 cpu_buffer->read_stamp = 0; 3389 cpu_buffer->read_stamp = 0;
3390
3391 rb_head_page_activate(cpu_buffer);
2686} 3392}
2687 3393
2688/** 3394/**
@@ -2702,12 +3408,16 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
2702 3408
2703 spin_lock_irqsave(&cpu_buffer->reader_lock, flags); 3409 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
2704 3410
3411 if (RB_WARN_ON(cpu_buffer, local_read(&cpu_buffer->committing)))
3412 goto out;
3413
2705 __raw_spin_lock(&cpu_buffer->lock); 3414 __raw_spin_lock(&cpu_buffer->lock);
2706 3415
2707 rb_reset_cpu(cpu_buffer); 3416 rb_reset_cpu(cpu_buffer);
2708 3417
2709 __raw_spin_unlock(&cpu_buffer->lock); 3418 __raw_spin_unlock(&cpu_buffer->lock);
2710 3419
3420 out:
2711 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 3421 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
2712 3422
2713 atomic_dec(&cpu_buffer->record_disabled); 3423 atomic_dec(&cpu_buffer->record_disabled);
@@ -2734,12 +3444,25 @@ EXPORT_SYMBOL_GPL(ring_buffer_reset);
2734int ring_buffer_empty(struct ring_buffer *buffer) 3444int ring_buffer_empty(struct ring_buffer *buffer)
2735{ 3445{
2736 struct ring_buffer_per_cpu *cpu_buffer; 3446 struct ring_buffer_per_cpu *cpu_buffer;
3447 unsigned long flags;
3448 int dolock;
2737 int cpu; 3449 int cpu;
3450 int ret;
3451
3452 dolock = rb_ok_to_lock();
2738 3453
2739 /* yes this is racy, but if you don't like the race, lock the buffer */ 3454 /* yes this is racy, but if you don't like the race, lock the buffer */
2740 for_each_buffer_cpu(buffer, cpu) { 3455 for_each_buffer_cpu(buffer, cpu) {
2741 cpu_buffer = buffer->buffers[cpu]; 3456 cpu_buffer = buffer->buffers[cpu];
2742 if (!rb_per_cpu_empty(cpu_buffer)) 3457 local_irq_save(flags);
3458 if (dolock)
3459 spin_lock(&cpu_buffer->reader_lock);
3460 ret = rb_per_cpu_empty(cpu_buffer);
3461 if (dolock)
3462 spin_unlock(&cpu_buffer->reader_lock);
3463 local_irq_restore(flags);
3464
3465 if (!ret)
2743 return 0; 3466 return 0;
2744 } 3467 }
2745 3468
@@ -2755,19 +3478,29 @@ EXPORT_SYMBOL_GPL(ring_buffer_empty);
2755int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu) 3478int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu)
2756{ 3479{
2757 struct ring_buffer_per_cpu *cpu_buffer; 3480 struct ring_buffer_per_cpu *cpu_buffer;
3481 unsigned long flags;
3482 int dolock;
2758 int ret; 3483 int ret;
2759 3484
2760 if (!cpumask_test_cpu(cpu, buffer->cpumask)) 3485 if (!cpumask_test_cpu(cpu, buffer->cpumask))
2761 return 1; 3486 return 1;
2762 3487
3488 dolock = rb_ok_to_lock();
3489
2763 cpu_buffer = buffer->buffers[cpu]; 3490 cpu_buffer = buffer->buffers[cpu];
3491 local_irq_save(flags);
3492 if (dolock)
3493 spin_lock(&cpu_buffer->reader_lock);
2764 ret = rb_per_cpu_empty(cpu_buffer); 3494 ret = rb_per_cpu_empty(cpu_buffer);
2765 3495 if (dolock)
3496 spin_unlock(&cpu_buffer->reader_lock);
3497 local_irq_restore(flags);
2766 3498
2767 return ret; 3499 return ret;
2768} 3500}
2769EXPORT_SYMBOL_GPL(ring_buffer_empty_cpu); 3501EXPORT_SYMBOL_GPL(ring_buffer_empty_cpu);
2770 3502
3503#ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
2771/** 3504/**
2772 * ring_buffer_swap_cpu - swap a CPU buffer between two ring buffers 3505 * ring_buffer_swap_cpu - swap a CPU buffer between two ring buffers
2773 * @buffer_a: One buffer to swap with 3506 * @buffer_a: One buffer to swap with
@@ -2822,20 +3555,28 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
2822 atomic_inc(&cpu_buffer_a->record_disabled); 3555 atomic_inc(&cpu_buffer_a->record_disabled);
2823 atomic_inc(&cpu_buffer_b->record_disabled); 3556 atomic_inc(&cpu_buffer_b->record_disabled);
2824 3557
3558 ret = -EBUSY;
3559 if (local_read(&cpu_buffer_a->committing))
3560 goto out_dec;
3561 if (local_read(&cpu_buffer_b->committing))
3562 goto out_dec;
3563
2825 buffer_a->buffers[cpu] = cpu_buffer_b; 3564 buffer_a->buffers[cpu] = cpu_buffer_b;
2826 buffer_b->buffers[cpu] = cpu_buffer_a; 3565 buffer_b->buffers[cpu] = cpu_buffer_a;
2827 3566
2828 cpu_buffer_b->buffer = buffer_a; 3567 cpu_buffer_b->buffer = buffer_a;
2829 cpu_buffer_a->buffer = buffer_b; 3568 cpu_buffer_a->buffer = buffer_b;
2830 3569
3570 ret = 0;
3571
3572out_dec:
2831 atomic_dec(&cpu_buffer_a->record_disabled); 3573 atomic_dec(&cpu_buffer_a->record_disabled);
2832 atomic_dec(&cpu_buffer_b->record_disabled); 3574 atomic_dec(&cpu_buffer_b->record_disabled);
2833
2834 ret = 0;
2835out: 3575out:
2836 return ret; 3576 return ret;
2837} 3577}
2838EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu); 3578EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu);
3579#endif /* CONFIG_RING_BUFFER_ALLOW_SWAP */
2839 3580
2840/** 3581/**
2841 * ring_buffer_alloc_read_page - allocate a page to read from buffer 3582 * ring_buffer_alloc_read_page - allocate a page to read from buffer
@@ -3008,7 +3749,7 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
3008 read = 0; 3749 read = 0;
3009 } else { 3750 } else {
3010 /* update the entry counter */ 3751 /* update the entry counter */
3011 cpu_buffer->read += local_read(&reader->entries); 3752 cpu_buffer->read += rb_page_entries(reader);
3012 3753
3013 /* swap the pages */ 3754 /* swap the pages */
3014 rb_init_page(bpage); 3755 rb_init_page(bpage);
@@ -3029,6 +3770,7 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
3029} 3770}
3030EXPORT_SYMBOL_GPL(ring_buffer_read_page); 3771EXPORT_SYMBOL_GPL(ring_buffer_read_page);
3031 3772
3773#ifdef CONFIG_TRACING
3032static ssize_t 3774static ssize_t
3033rb_simple_read(struct file *filp, char __user *ubuf, 3775rb_simple_read(struct file *filp, char __user *ubuf,
3034 size_t cnt, loff_t *ppos) 3776 size_t cnt, loff_t *ppos)
@@ -3096,6 +3838,7 @@ static __init int rb_init_debugfs(void)
3096} 3838}
3097 3839
3098fs_initcall(rb_init_debugfs); 3840fs_initcall(rb_init_debugfs);
3841#endif
3099 3842
3100#ifdef CONFIG_HOTPLUG_CPU 3843#ifdef CONFIG_HOTPLUG_CPU
3101static int rb_cpu_notify(struct notifier_block *self, 3844static int rb_cpu_notify(struct notifier_block *self,
@@ -3108,7 +3851,7 @@ static int rb_cpu_notify(struct notifier_block *self,
3108 switch (action) { 3851 switch (action) {
3109 case CPU_UP_PREPARE: 3852 case CPU_UP_PREPARE:
3110 case CPU_UP_PREPARE_FROZEN: 3853 case CPU_UP_PREPARE_FROZEN:
3111 if (cpu_isset(cpu, *buffer->cpumask)) 3854 if (cpumask_test_cpu(cpu, buffer->cpumask))
3112 return NOTIFY_OK; 3855 return NOTIFY_OK;
3113 3856
3114 buffer->buffers[cpu] = 3857 buffer->buffers[cpu] =
@@ -3119,7 +3862,7 @@ static int rb_cpu_notify(struct notifier_block *self,
3119 return NOTIFY_OK; 3862 return NOTIFY_OK;
3120 } 3863 }
3121 smp_wmb(); 3864 smp_wmb();
3122 cpu_set(cpu, *buffer->cpumask); 3865 cpumask_set_cpu(cpu, buffer->cpumask);
3123 break; 3866 break;
3124 case CPU_DOWN_PREPARE: 3867 case CPU_DOWN_PREPARE:
3125 case CPU_DOWN_PREPARE_FROZEN: 3868 case CPU_DOWN_PREPARE_FROZEN:
diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c
index 8d68e149a8b3..573d3cc762c3 100644
--- a/kernel/trace/ring_buffer_benchmark.c
+++ b/kernel/trace/ring_buffer_benchmark.c
@@ -102,8 +102,10 @@ static enum event_status read_page(int cpu)
102 event = (void *)&rpage->data[i]; 102 event = (void *)&rpage->data[i];
103 switch (event->type_len) { 103 switch (event->type_len) {
104 case RINGBUF_TYPE_PADDING: 104 case RINGBUF_TYPE_PADDING:
105 /* We don't expect any padding */ 105 /* failed writes may be discarded events */
106 KILL_TEST(); 106 if (!event->time_delta)
107 KILL_TEST();
108 inc = event->array[0] + 4;
107 break; 109 break;
108 case RINGBUF_TYPE_TIME_EXTEND: 110 case RINGBUF_TYPE_TIME_EXTEND:
109 inc = 8; 111 inc = 8;
@@ -119,7 +121,7 @@ static enum event_status read_page(int cpu)
119 KILL_TEST(); 121 KILL_TEST();
120 break; 122 break;
121 } 123 }
122 inc = event->array[0]; 124 inc = event->array[0] + 4;
123 break; 125 break;
124 default: 126 default:
125 entry = ring_buffer_event_data(event); 127 entry = ring_buffer_event_data(event);
@@ -201,7 +203,7 @@ static void ring_buffer_producer(void)
201 * Hammer the buffer for 10 secs (this may 203 * Hammer the buffer for 10 secs (this may
202 * make the system stall) 204 * make the system stall)
203 */ 205 */
204 pr_info("Starting ring buffer hammer\n"); 206 trace_printk("Starting ring buffer hammer\n");
205 do_gettimeofday(&start_tv); 207 do_gettimeofday(&start_tv);
206 do { 208 do {
207 struct ring_buffer_event *event; 209 struct ring_buffer_event *event;
@@ -237,7 +239,7 @@ static void ring_buffer_producer(void)
237#endif 239#endif
238 240
239 } while (end_tv.tv_sec < (start_tv.tv_sec + RUN_TIME) && !kill_test); 241 } while (end_tv.tv_sec < (start_tv.tv_sec + RUN_TIME) && !kill_test);
240 pr_info("End ring buffer hammer\n"); 242 trace_printk("End ring buffer hammer\n");
241 243
242 if (consumer) { 244 if (consumer) {
243 /* Init both completions here to avoid races */ 245 /* Init both completions here to avoid races */
@@ -260,49 +262,50 @@ static void ring_buffer_producer(void)
260 overruns = ring_buffer_overruns(buffer); 262 overruns = ring_buffer_overruns(buffer);
261 263
262 if (kill_test) 264 if (kill_test)
263 pr_info("ERROR!\n"); 265 trace_printk("ERROR!\n");
264 pr_info("Time: %lld (usecs)\n", time); 266 trace_printk("Time: %lld (usecs)\n", time);
265 pr_info("Overruns: %lld\n", overruns); 267 trace_printk("Overruns: %lld\n", overruns);
266 if (disable_reader) 268 if (disable_reader)
267 pr_info("Read: (reader disabled)\n"); 269 trace_printk("Read: (reader disabled)\n");
268 else 270 else
269 pr_info("Read: %ld (by %s)\n", read, 271 trace_printk("Read: %ld (by %s)\n", read,
270 read_events ? "events" : "pages"); 272 read_events ? "events" : "pages");
271 pr_info("Entries: %lld\n", entries); 273 trace_printk("Entries: %lld\n", entries);
272 pr_info("Total: %lld\n", entries + overruns + read); 274 trace_printk("Total: %lld\n", entries + overruns + read);
273 pr_info("Missed: %ld\n", missed); 275 trace_printk("Missed: %ld\n", missed);
274 pr_info("Hit: %ld\n", hit); 276 trace_printk("Hit: %ld\n", hit);
275 277
276 /* Convert time from usecs to millisecs */ 278 /* Convert time from usecs to millisecs */
277 do_div(time, USEC_PER_MSEC); 279 do_div(time, USEC_PER_MSEC);
278 if (time) 280 if (time)
279 hit /= (long)time; 281 hit /= (long)time;
280 else 282 else
281 pr_info("TIME IS ZERO??\n"); 283 trace_printk("TIME IS ZERO??\n");
282 284
283 pr_info("Entries per millisec: %ld\n", hit); 285 trace_printk("Entries per millisec: %ld\n", hit);
284 286
285 if (hit) { 287 if (hit) {
286 /* Calculate the average time in nanosecs */ 288 /* Calculate the average time in nanosecs */
287 avg = NSEC_PER_MSEC / hit; 289 avg = NSEC_PER_MSEC / hit;
288 pr_info("%ld ns per entry\n", avg); 290 trace_printk("%ld ns per entry\n", avg);
289 } 291 }
290 292
291 if (missed) { 293 if (missed) {
292 if (time) 294 if (time)
293 missed /= (long)time; 295 missed /= (long)time;
294 296
295 pr_info("Total iterations per millisec: %ld\n", hit + missed); 297 trace_printk("Total iterations per millisec: %ld\n",
298 hit + missed);
296 299
297 /* it is possible that hit + missed will overflow and be zero */ 300 /* it is possible that hit + missed will overflow and be zero */
298 if (!(hit + missed)) { 301 if (!(hit + missed)) {
299 pr_info("hit + missed overflowed and totalled zero!\n"); 302 trace_printk("hit + missed overflowed and totalled zero!\n");
300 hit--; /* make it non zero */ 303 hit--; /* make it non zero */
301 } 304 }
302 305
303 /* Caculate the average time in nanosecs */ 306 /* Caculate the average time in nanosecs */
304 avg = NSEC_PER_MSEC / (hit + missed); 307 avg = NSEC_PER_MSEC / (hit + missed);
305 pr_info("%ld ns per entry\n", avg); 308 trace_printk("%ld ns per entry\n", avg);
306 } 309 }
307} 310}
308 311
@@ -353,7 +356,7 @@ static int ring_buffer_producer_thread(void *arg)
353 356
354 ring_buffer_producer(); 357 ring_buffer_producer();
355 358
356 pr_info("Sleeping for 10 secs\n"); 359 trace_printk("Sleeping for 10 secs\n");
357 set_current_state(TASK_INTERRUPTIBLE); 360 set_current_state(TASK_INTERRUPTIBLE);
358 schedule_timeout(HZ * SLEEP_TIME); 361 schedule_timeout(HZ * SLEEP_TIME);
359 __set_current_state(TASK_RUNNING); 362 __set_current_state(TASK_RUNNING);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index c1878bfb2e1e..5c75deeefe30 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -17,6 +17,7 @@
17#include <linux/writeback.h> 17#include <linux/writeback.h>
18#include <linux/kallsyms.h> 18#include <linux/kallsyms.h>
19#include <linux/seq_file.h> 19#include <linux/seq_file.h>
20#include <linux/smp_lock.h>
20#include <linux/notifier.h> 21#include <linux/notifier.h>
21#include <linux/irqflags.h> 22#include <linux/irqflags.h>
22#include <linux/debugfs.h> 23#include <linux/debugfs.h>
@@ -42,14 +43,11 @@
42 43
43#define TRACE_BUFFER_FLAGS (RB_FL_OVERWRITE) 44#define TRACE_BUFFER_FLAGS (RB_FL_OVERWRITE)
44 45
45unsigned long __read_mostly tracing_max_latency;
46unsigned long __read_mostly tracing_thresh;
47
48/* 46/*
49 * On boot up, the ring buffer is set to the minimum size, so that 47 * On boot up, the ring buffer is set to the minimum size, so that
50 * we do not waste memory on systems that are not using tracing. 48 * we do not waste memory on systems that are not using tracing.
51 */ 49 */
52static int ring_buffer_expanded; 50int ring_buffer_expanded;
53 51
54/* 52/*
55 * We need to change this state when a selftest is running. 53 * We need to change this state when a selftest is running.
@@ -63,7 +61,7 @@ static bool __read_mostly tracing_selftest_running;
63/* 61/*
64 * If a tracer is running, we do not want to run SELFTEST. 62 * If a tracer is running, we do not want to run SELFTEST.
65 */ 63 */
66static bool __read_mostly tracing_selftest_disabled; 64bool __read_mostly tracing_selftest_disabled;
67 65
68/* For tracers that don't implement custom flags */ 66/* For tracers that don't implement custom flags */
69static struct tracer_opt dummy_tracer_opt[] = { 67static struct tracer_opt dummy_tracer_opt[] = {
@@ -88,7 +86,7 @@ static int dummy_set_flag(u32 old_flags, u32 bit, int set)
88 */ 86 */
89static int tracing_disabled = 1; 87static int tracing_disabled = 1;
90 88
91static DEFINE_PER_CPU(local_t, ftrace_cpu_disabled); 89DEFINE_PER_CPU(local_t, ftrace_cpu_disabled);
92 90
93static inline void ftrace_disable_cpu(void) 91static inline void ftrace_disable_cpu(void)
94{ 92{
@@ -171,10 +169,11 @@ static struct trace_array global_trace;
171 169
172static DEFINE_PER_CPU(struct trace_array_cpu, global_trace_cpu); 170static DEFINE_PER_CPU(struct trace_array_cpu, global_trace_cpu);
173 171
174int filter_current_check_discard(struct ftrace_event_call *call, void *rec, 172int filter_current_check_discard(struct ring_buffer *buffer,
173 struct ftrace_event_call *call, void *rec,
175 struct ring_buffer_event *event) 174 struct ring_buffer_event *event)
176{ 175{
177 return filter_check_discard(call, rec, global_trace.buffer, event); 176 return filter_check_discard(call, rec, buffer, event);
178} 177}
179EXPORT_SYMBOL_GPL(filter_current_check_discard); 178EXPORT_SYMBOL_GPL(filter_current_check_discard);
180 179
@@ -265,6 +264,9 @@ unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
265 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME | 264 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME |
266 TRACE_ITER_GRAPH_TIME; 265 TRACE_ITER_GRAPH_TIME;
267 266
267static int trace_stop_count;
268static DEFINE_SPINLOCK(tracing_start_lock);
269
268/** 270/**
269 * trace_wake_up - wake up tasks waiting for trace input 271 * trace_wake_up - wake up tasks waiting for trace input
270 * 272 *
@@ -284,13 +286,12 @@ void trace_wake_up(void)
284static int __init set_buf_size(char *str) 286static int __init set_buf_size(char *str)
285{ 287{
286 unsigned long buf_size; 288 unsigned long buf_size;
287 int ret;
288 289
289 if (!str) 290 if (!str)
290 return 0; 291 return 0;
291 ret = strict_strtoul(str, 0, &buf_size); 292 buf_size = memparse(str, &str);
292 /* nr_entries can not be zero */ 293 /* nr_entries can not be zero */
293 if (ret < 0 || buf_size == 0) 294 if (buf_size == 0)
294 return 0; 295 return 0;
295 trace_buf_size = buf_size; 296 trace_buf_size = buf_size;
296 return 1; 297 return 1;
@@ -323,50 +324,20 @@ static const char *trace_options[] = {
323 "printk-msg-only", 324 "printk-msg-only",
324 "context-info", 325 "context-info",
325 "latency-format", 326 "latency-format",
326 "global-clock",
327 "sleep-time", 327 "sleep-time",
328 "graph-time", 328 "graph-time",
329 NULL 329 NULL
330}; 330};
331 331
332/* 332static struct {
333 * ftrace_max_lock is used to protect the swapping of buffers 333 u64 (*func)(void);
334 * when taking a max snapshot. The buffers themselves are 334 const char *name;
335 * protected by per_cpu spinlocks. But the action of the swap 335} trace_clocks[] = {
336 * needs its own lock. 336 { trace_clock_local, "local" },
337 * 337 { trace_clock_global, "global" },
338 * This is defined as a raw_spinlock_t in order to help 338};
339 * with performance when lockdep debugging is enabled.
340 */
341static raw_spinlock_t ftrace_max_lock =
342 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
343
344/*
345 * Copy the new maximum trace into the separate maximum-trace
346 * structure. (this way the maximum trace is permanently saved,
347 * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
348 */
349static void
350__update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
351{
352 struct trace_array_cpu *data = tr->data[cpu];
353
354 max_tr.cpu = cpu;
355 max_tr.time_start = data->preempt_timestamp;
356
357 data = max_tr.data[cpu];
358 data->saved_latency = tracing_max_latency;
359
360 memcpy(data->comm, tsk->comm, TASK_COMM_LEN);
361 data->pid = tsk->pid;
362 data->uid = task_uid(tsk);
363 data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
364 data->policy = tsk->policy;
365 data->rt_priority = tsk->rt_priority;
366 339
367 /* record this tasks comm */ 340int trace_clock_id;
368 tracing_record_cmdline(tsk);
369}
370 341
371ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt) 342ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt)
372{ 343{
@@ -411,6 +382,56 @@ static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
411 return cnt; 382 return cnt;
412} 383}
413 384
385/*
386 * ftrace_max_lock is used to protect the swapping of buffers
387 * when taking a max snapshot. The buffers themselves are
388 * protected by per_cpu spinlocks. But the action of the swap
389 * needs its own lock.
390 *
391 * This is defined as a raw_spinlock_t in order to help
392 * with performance when lockdep debugging is enabled.
393 *
394 * It is also used in other places outside the update_max_tr
395 * so it needs to be defined outside of the
396 * CONFIG_TRACER_MAX_TRACE.
397 */
398static raw_spinlock_t ftrace_max_lock =
399 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
400
401#ifdef CONFIG_TRACER_MAX_TRACE
402unsigned long __read_mostly tracing_max_latency;
403unsigned long __read_mostly tracing_thresh;
404
405/*
406 * Copy the new maximum trace into the separate maximum-trace
407 * structure. (this way the maximum trace is permanently saved,
408 * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
409 */
410static void
411__update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
412{
413 struct trace_array_cpu *data = tr->data[cpu];
414 struct trace_array_cpu *max_data = tr->data[cpu];
415
416 max_tr.cpu = cpu;
417 max_tr.time_start = data->preempt_timestamp;
418
419 max_data = max_tr.data[cpu];
420 max_data->saved_latency = tracing_max_latency;
421 max_data->critical_start = data->critical_start;
422 max_data->critical_end = data->critical_end;
423
424 memcpy(data->comm, tsk->comm, TASK_COMM_LEN);
425 max_data->pid = tsk->pid;
426 max_data->uid = task_uid(tsk);
427 max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
428 max_data->policy = tsk->policy;
429 max_data->rt_priority = tsk->rt_priority;
430
431 /* record this tasks comm */
432 tracing_record_cmdline(tsk);
433}
434
414/** 435/**
415 * update_max_tr - snapshot all trace buffers from global_trace to max_tr 436 * update_max_tr - snapshot all trace buffers from global_trace to max_tr
416 * @tr: tracer 437 * @tr: tracer
@@ -425,16 +446,15 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
425{ 446{
426 struct ring_buffer *buf = tr->buffer; 447 struct ring_buffer *buf = tr->buffer;
427 448
449 if (trace_stop_count)
450 return;
451
428 WARN_ON_ONCE(!irqs_disabled()); 452 WARN_ON_ONCE(!irqs_disabled());
429 __raw_spin_lock(&ftrace_max_lock); 453 __raw_spin_lock(&ftrace_max_lock);
430 454
431 tr->buffer = max_tr.buffer; 455 tr->buffer = max_tr.buffer;
432 max_tr.buffer = buf; 456 max_tr.buffer = buf;
433 457
434 ftrace_disable_cpu();
435 ring_buffer_reset(tr->buffer);
436 ftrace_enable_cpu();
437
438 __update_max_tr(tr, tsk, cpu); 458 __update_max_tr(tr, tsk, cpu);
439 __raw_spin_unlock(&ftrace_max_lock); 459 __raw_spin_unlock(&ftrace_max_lock);
440} 460}
@@ -452,21 +472,35 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
452{ 472{
453 int ret; 473 int ret;
454 474
475 if (trace_stop_count)
476 return;
477
455 WARN_ON_ONCE(!irqs_disabled()); 478 WARN_ON_ONCE(!irqs_disabled());
456 __raw_spin_lock(&ftrace_max_lock); 479 __raw_spin_lock(&ftrace_max_lock);
457 480
458 ftrace_disable_cpu(); 481 ftrace_disable_cpu();
459 482
460 ring_buffer_reset(max_tr.buffer);
461 ret = ring_buffer_swap_cpu(max_tr.buffer, tr->buffer, cpu); 483 ret = ring_buffer_swap_cpu(max_tr.buffer, tr->buffer, cpu);
462 484
485 if (ret == -EBUSY) {
486 /*
487 * We failed to swap the buffer due to a commit taking
488 * place on this CPU. We fail to record, but we reset
489 * the max trace buffer (no one writes directly to it)
490 * and flag that it failed.
491 */
492 trace_array_printk(&max_tr, _THIS_IP_,
493 "Failed to swap buffers due to commit in progress\n");
494 }
495
463 ftrace_enable_cpu(); 496 ftrace_enable_cpu();
464 497
465 WARN_ON_ONCE(ret && ret != -EAGAIN); 498 WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
466 499
467 __update_max_tr(tr, tsk, cpu); 500 __update_max_tr(tr, tsk, cpu);
468 __raw_spin_unlock(&ftrace_max_lock); 501 __raw_spin_unlock(&ftrace_max_lock);
469} 502}
503#endif /* CONFIG_TRACER_MAX_TRACE */
470 504
471/** 505/**
472 * register_tracer - register a tracer with the ftrace system. 506 * register_tracer - register a tracer with the ftrace system.
@@ -523,7 +557,6 @@ __acquires(kernel_lock)
523 if (type->selftest && !tracing_selftest_disabled) { 557 if (type->selftest && !tracing_selftest_disabled) {
524 struct tracer *saved_tracer = current_trace; 558 struct tracer *saved_tracer = current_trace;
525 struct trace_array *tr = &global_trace; 559 struct trace_array *tr = &global_trace;
526 int i;
527 560
528 /* 561 /*
529 * Run a selftest on this tracer. 562 * Run a selftest on this tracer.
@@ -532,8 +565,7 @@ __acquires(kernel_lock)
532 * internal tracing to verify that everything is in order. 565 * internal tracing to verify that everything is in order.
533 * If we fail, we do not register this tracer. 566 * If we fail, we do not register this tracer.
534 */ 567 */
535 for_each_tracing_cpu(i) 568 tracing_reset_online_cpus(tr);
536 tracing_reset(tr, i);
537 569
538 current_trace = type; 570 current_trace = type;
539 /* the test is responsible for initializing and enabling */ 571 /* the test is responsible for initializing and enabling */
@@ -546,8 +578,7 @@ __acquires(kernel_lock)
546 goto out; 578 goto out;
547 } 579 }
548 /* Only reset on passing, to avoid touching corrupted buffers */ 580 /* Only reset on passing, to avoid touching corrupted buffers */
549 for_each_tracing_cpu(i) 581 tracing_reset_online_cpus(tr);
550 tracing_reset(tr, i);
551 582
552 printk(KERN_CONT "PASSED\n"); 583 printk(KERN_CONT "PASSED\n");
553 } 584 }
@@ -622,21 +653,42 @@ void unregister_tracer(struct tracer *type)
622 mutex_unlock(&trace_types_lock); 653 mutex_unlock(&trace_types_lock);
623} 654}
624 655
625void tracing_reset(struct trace_array *tr, int cpu) 656static void __tracing_reset(struct trace_array *tr, int cpu)
626{ 657{
627 ftrace_disable_cpu(); 658 ftrace_disable_cpu();
628 ring_buffer_reset_cpu(tr->buffer, cpu); 659 ring_buffer_reset_cpu(tr->buffer, cpu);
629 ftrace_enable_cpu(); 660 ftrace_enable_cpu();
630} 661}
631 662
663void tracing_reset(struct trace_array *tr, int cpu)
664{
665 struct ring_buffer *buffer = tr->buffer;
666
667 ring_buffer_record_disable(buffer);
668
669 /* Make sure all commits have finished */
670 synchronize_sched();
671 __tracing_reset(tr, cpu);
672
673 ring_buffer_record_enable(buffer);
674}
675
632void tracing_reset_online_cpus(struct trace_array *tr) 676void tracing_reset_online_cpus(struct trace_array *tr)
633{ 677{
678 struct ring_buffer *buffer = tr->buffer;
634 int cpu; 679 int cpu;
635 680
681 ring_buffer_record_disable(buffer);
682
683 /* Make sure all commits have finished */
684 synchronize_sched();
685
636 tr->time_start = ftrace_now(tr->cpu); 686 tr->time_start = ftrace_now(tr->cpu);
637 687
638 for_each_online_cpu(cpu) 688 for_each_online_cpu(cpu)
639 tracing_reset(tr, cpu); 689 __tracing_reset(tr, cpu);
690
691 ring_buffer_record_enable(buffer);
640} 692}
641 693
642void tracing_reset_current(int cpu) 694void tracing_reset_current(int cpu)
@@ -667,9 +719,6 @@ static void trace_init_cmdlines(void)
667 cmdline_idx = 0; 719 cmdline_idx = 0;
668} 720}
669 721
670static int trace_stop_count;
671static DEFINE_SPINLOCK(tracing_start_lock);
672
673/** 722/**
674 * ftrace_off_permanent - disable all ftrace code permanently 723 * ftrace_off_permanent - disable all ftrace code permanently
675 * 724 *
@@ -848,15 +897,17 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
848 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) | 897 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
849 (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0); 898 (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0);
850} 899}
900EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
851 901
852struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr, 902struct ring_buffer_event *
853 int type, 903trace_buffer_lock_reserve(struct ring_buffer *buffer,
854 unsigned long len, 904 int type,
855 unsigned long flags, int pc) 905 unsigned long len,
906 unsigned long flags, int pc)
856{ 907{
857 struct ring_buffer_event *event; 908 struct ring_buffer_event *event;
858 909
859 event = ring_buffer_lock_reserve(tr->buffer, len); 910 event = ring_buffer_lock_reserve(buffer, len);
860 if (event != NULL) { 911 if (event != NULL) {
861 struct trace_entry *ent = ring_buffer_event_data(event); 912 struct trace_entry *ent = ring_buffer_event_data(event);
862 913
@@ -866,58 +917,60 @@ struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr,
866 917
867 return event; 918 return event;
868} 919}
869static void ftrace_trace_stack(struct trace_array *tr,
870 unsigned long flags, int skip, int pc);
871static void ftrace_trace_userstack(struct trace_array *tr,
872 unsigned long flags, int pc);
873 920
874static inline void __trace_buffer_unlock_commit(struct trace_array *tr, 921static inline void
875 struct ring_buffer_event *event, 922__trace_buffer_unlock_commit(struct ring_buffer *buffer,
876 unsigned long flags, int pc, 923 struct ring_buffer_event *event,
877 int wake) 924 unsigned long flags, int pc,
925 int wake)
878{ 926{
879 ring_buffer_unlock_commit(tr->buffer, event); 927 ring_buffer_unlock_commit(buffer, event);
880 928
881 ftrace_trace_stack(tr, flags, 6, pc); 929 ftrace_trace_stack(buffer, flags, 6, pc);
882 ftrace_trace_userstack(tr, flags, pc); 930 ftrace_trace_userstack(buffer, flags, pc);
883 931
884 if (wake) 932 if (wake)
885 trace_wake_up(); 933 trace_wake_up();
886} 934}
887 935
888void trace_buffer_unlock_commit(struct trace_array *tr, 936void trace_buffer_unlock_commit(struct ring_buffer *buffer,
889 struct ring_buffer_event *event, 937 struct ring_buffer_event *event,
890 unsigned long flags, int pc) 938 unsigned long flags, int pc)
891{ 939{
892 __trace_buffer_unlock_commit(tr, event, flags, pc, 1); 940 __trace_buffer_unlock_commit(buffer, event, flags, pc, 1);
893} 941}
894 942
895struct ring_buffer_event * 943struct ring_buffer_event *
896trace_current_buffer_lock_reserve(int type, unsigned long len, 944trace_current_buffer_lock_reserve(struct ring_buffer **current_rb,
945 int type, unsigned long len,
897 unsigned long flags, int pc) 946 unsigned long flags, int pc)
898{ 947{
899 return trace_buffer_lock_reserve(&global_trace, 948 *current_rb = global_trace.buffer;
949 return trace_buffer_lock_reserve(*current_rb,
900 type, len, flags, pc); 950 type, len, flags, pc);
901} 951}
902EXPORT_SYMBOL_GPL(trace_current_buffer_lock_reserve); 952EXPORT_SYMBOL_GPL(trace_current_buffer_lock_reserve);
903 953
904void trace_current_buffer_unlock_commit(struct ring_buffer_event *event, 954void trace_current_buffer_unlock_commit(struct ring_buffer *buffer,
955 struct ring_buffer_event *event,
905 unsigned long flags, int pc) 956 unsigned long flags, int pc)
906{ 957{
907 __trace_buffer_unlock_commit(&global_trace, event, flags, pc, 1); 958 __trace_buffer_unlock_commit(buffer, event, flags, pc, 1);
908} 959}
909EXPORT_SYMBOL_GPL(trace_current_buffer_unlock_commit); 960EXPORT_SYMBOL_GPL(trace_current_buffer_unlock_commit);
910 961
911void trace_nowake_buffer_unlock_commit(struct ring_buffer_event *event, 962void trace_nowake_buffer_unlock_commit(struct ring_buffer *buffer,
912 unsigned long flags, int pc) 963 struct ring_buffer_event *event,
964 unsigned long flags, int pc)
913{ 965{
914 __trace_buffer_unlock_commit(&global_trace, event, flags, pc, 0); 966 __trace_buffer_unlock_commit(buffer, event, flags, pc, 0);
915} 967}
916EXPORT_SYMBOL_GPL(trace_nowake_buffer_unlock_commit); 968EXPORT_SYMBOL_GPL(trace_nowake_buffer_unlock_commit);
917 969
918void trace_current_buffer_discard_commit(struct ring_buffer_event *event) 970void trace_current_buffer_discard_commit(struct ring_buffer *buffer,
971 struct ring_buffer_event *event)
919{ 972{
920 ring_buffer_discard_commit(global_trace.buffer, event); 973 ring_buffer_discard_commit(buffer, event);
921} 974}
922EXPORT_SYMBOL_GPL(trace_current_buffer_discard_commit); 975EXPORT_SYMBOL_GPL(trace_current_buffer_discard_commit);
923 976
@@ -927,6 +980,7 @@ trace_function(struct trace_array *tr,
927 int pc) 980 int pc)
928{ 981{
929 struct ftrace_event_call *call = &event_function; 982 struct ftrace_event_call *call = &event_function;
983 struct ring_buffer *buffer = tr->buffer;
930 struct ring_buffer_event *event; 984 struct ring_buffer_event *event;
931 struct ftrace_entry *entry; 985 struct ftrace_entry *entry;
932 986
@@ -934,7 +988,7 @@ trace_function(struct trace_array *tr,
934 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) 988 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
935 return; 989 return;
936 990
937 event = trace_buffer_lock_reserve(tr, TRACE_FN, sizeof(*entry), 991 event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
938 flags, pc); 992 flags, pc);
939 if (!event) 993 if (!event)
940 return; 994 return;
@@ -942,57 +996,9 @@ trace_function(struct trace_array *tr,
942 entry->ip = ip; 996 entry->ip = ip;
943 entry->parent_ip = parent_ip; 997 entry->parent_ip = parent_ip;
944 998
945 if (!filter_check_discard(call, entry, tr->buffer, event)) 999 if (!filter_check_discard(call, entry, buffer, event))
946 ring_buffer_unlock_commit(tr->buffer, event); 1000 ring_buffer_unlock_commit(buffer, event);
947}
948
949#ifdef CONFIG_FUNCTION_GRAPH_TRACER
950static int __trace_graph_entry(struct trace_array *tr,
951 struct ftrace_graph_ent *trace,
952 unsigned long flags,
953 int pc)
954{
955 struct ftrace_event_call *call = &event_funcgraph_entry;
956 struct ring_buffer_event *event;
957 struct ftrace_graph_ent_entry *entry;
958
959 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
960 return 0;
961
962 event = trace_buffer_lock_reserve(&global_trace, TRACE_GRAPH_ENT,
963 sizeof(*entry), flags, pc);
964 if (!event)
965 return 0;
966 entry = ring_buffer_event_data(event);
967 entry->graph_ent = *trace;
968 if (!filter_current_check_discard(call, entry, event))
969 ring_buffer_unlock_commit(global_trace.buffer, event);
970
971 return 1;
972}
973
974static void __trace_graph_return(struct trace_array *tr,
975 struct ftrace_graph_ret *trace,
976 unsigned long flags,
977 int pc)
978{
979 struct ftrace_event_call *call = &event_funcgraph_exit;
980 struct ring_buffer_event *event;
981 struct ftrace_graph_ret_entry *entry;
982
983 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
984 return;
985
986 event = trace_buffer_lock_reserve(&global_trace, TRACE_GRAPH_RET,
987 sizeof(*entry), flags, pc);
988 if (!event)
989 return;
990 entry = ring_buffer_event_data(event);
991 entry->ret = *trace;
992 if (!filter_current_check_discard(call, entry, event))
993 ring_buffer_unlock_commit(global_trace.buffer, event);
994} 1001}
995#endif
996 1002
997void 1003void
998ftrace(struct trace_array *tr, struct trace_array_cpu *data, 1004ftrace(struct trace_array *tr, struct trace_array_cpu *data,
@@ -1003,17 +1009,17 @@ ftrace(struct trace_array *tr, struct trace_array_cpu *data,
1003 trace_function(tr, ip, parent_ip, flags, pc); 1009 trace_function(tr, ip, parent_ip, flags, pc);
1004} 1010}
1005 1011
1006static void __ftrace_trace_stack(struct trace_array *tr, 1012#ifdef CONFIG_STACKTRACE
1013static void __ftrace_trace_stack(struct ring_buffer *buffer,
1007 unsigned long flags, 1014 unsigned long flags,
1008 int skip, int pc) 1015 int skip, int pc)
1009{ 1016{
1010#ifdef CONFIG_STACKTRACE
1011 struct ftrace_event_call *call = &event_kernel_stack; 1017 struct ftrace_event_call *call = &event_kernel_stack;
1012 struct ring_buffer_event *event; 1018 struct ring_buffer_event *event;
1013 struct stack_entry *entry; 1019 struct stack_entry *entry;
1014 struct stack_trace trace; 1020 struct stack_trace trace;
1015 1021
1016 event = trace_buffer_lock_reserve(tr, TRACE_STACK, 1022 event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
1017 sizeof(*entry), flags, pc); 1023 sizeof(*entry), flags, pc);
1018 if (!event) 1024 if (!event)
1019 return; 1025 return;
@@ -1026,32 +1032,28 @@ static void __ftrace_trace_stack(struct trace_array *tr,
1026 trace.entries = entry->caller; 1032 trace.entries = entry->caller;
1027 1033
1028 save_stack_trace(&trace); 1034 save_stack_trace(&trace);
1029 if (!filter_check_discard(call, entry, tr->buffer, event)) 1035 if (!filter_check_discard(call, entry, buffer, event))
1030 ring_buffer_unlock_commit(tr->buffer, event); 1036 ring_buffer_unlock_commit(buffer, event);
1031#endif
1032} 1037}
1033 1038
1034static void ftrace_trace_stack(struct trace_array *tr, 1039void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags,
1035 unsigned long flags, 1040 int skip, int pc)
1036 int skip, int pc)
1037{ 1041{
1038 if (!(trace_flags & TRACE_ITER_STACKTRACE)) 1042 if (!(trace_flags & TRACE_ITER_STACKTRACE))
1039 return; 1043 return;
1040 1044
1041 __ftrace_trace_stack(tr, flags, skip, pc); 1045 __ftrace_trace_stack(buffer, flags, skip, pc);
1042} 1046}
1043 1047
1044void __trace_stack(struct trace_array *tr, 1048void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
1045 unsigned long flags, 1049 int pc)
1046 int skip, int pc)
1047{ 1050{
1048 __ftrace_trace_stack(tr, flags, skip, pc); 1051 __ftrace_trace_stack(tr->buffer, flags, skip, pc);
1049} 1052}
1050 1053
1051static void ftrace_trace_userstack(struct trace_array *tr, 1054void
1052 unsigned long flags, int pc) 1055ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1053{ 1056{
1054#ifdef CONFIG_STACKTRACE
1055 struct ftrace_event_call *call = &event_user_stack; 1057 struct ftrace_event_call *call = &event_user_stack;
1056 struct ring_buffer_event *event; 1058 struct ring_buffer_event *event;
1057 struct userstack_entry *entry; 1059 struct userstack_entry *entry;
@@ -1060,7 +1062,7 @@ static void ftrace_trace_userstack(struct trace_array *tr,
1060 if (!(trace_flags & TRACE_ITER_USERSTACKTRACE)) 1062 if (!(trace_flags & TRACE_ITER_USERSTACKTRACE))
1061 return; 1063 return;
1062 1064
1063 event = trace_buffer_lock_reserve(tr, TRACE_USER_STACK, 1065 event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
1064 sizeof(*entry), flags, pc); 1066 sizeof(*entry), flags, pc);
1065 if (!event) 1067 if (!event)
1066 return; 1068 return;
@@ -1074,9 +1076,8 @@ static void ftrace_trace_userstack(struct trace_array *tr,
1074 trace.entries = entry->caller; 1076 trace.entries = entry->caller;
1075 1077
1076 save_stack_trace_user(&trace); 1078 save_stack_trace_user(&trace);
1077 if (!filter_check_discard(call, entry, tr->buffer, event)) 1079 if (!filter_check_discard(call, entry, buffer, event))
1078 ring_buffer_unlock_commit(tr->buffer, event); 1080 ring_buffer_unlock_commit(buffer, event);
1079#endif
1080} 1081}
1081 1082
1082#ifdef UNUSED 1083#ifdef UNUSED
@@ -1086,6 +1087,8 @@ static void __trace_userstack(struct trace_array *tr, unsigned long flags)
1086} 1087}
1087#endif /* UNUSED */ 1088#endif /* UNUSED */
1088 1089
1090#endif /* CONFIG_STACKTRACE */
1091
1089static void 1092static void
1090ftrace_trace_special(void *__tr, 1093ftrace_trace_special(void *__tr,
1091 unsigned long arg1, unsigned long arg2, unsigned long arg3, 1094 unsigned long arg1, unsigned long arg2, unsigned long arg3,
@@ -1093,9 +1096,10 @@ ftrace_trace_special(void *__tr,
1093{ 1096{
1094 struct ring_buffer_event *event; 1097 struct ring_buffer_event *event;
1095 struct trace_array *tr = __tr; 1098 struct trace_array *tr = __tr;
1099 struct ring_buffer *buffer = tr->buffer;
1096 struct special_entry *entry; 1100 struct special_entry *entry;
1097 1101
1098 event = trace_buffer_lock_reserve(tr, TRACE_SPECIAL, 1102 event = trace_buffer_lock_reserve(buffer, TRACE_SPECIAL,
1099 sizeof(*entry), 0, pc); 1103 sizeof(*entry), 0, pc);
1100 if (!event) 1104 if (!event)
1101 return; 1105 return;
@@ -1103,7 +1107,7 @@ ftrace_trace_special(void *__tr,
1103 entry->arg1 = arg1; 1107 entry->arg1 = arg1;
1104 entry->arg2 = arg2; 1108 entry->arg2 = arg2;
1105 entry->arg3 = arg3; 1109 entry->arg3 = arg3;
1106 trace_buffer_unlock_commit(tr, event, 0, pc); 1110 trace_buffer_unlock_commit(buffer, event, 0, pc);
1107} 1111}
1108 1112
1109void 1113void
@@ -1114,62 +1118,6 @@ __trace_special(void *__tr, void *__data,
1114} 1118}
1115 1119
1116void 1120void
1117tracing_sched_switch_trace(struct trace_array *tr,
1118 struct task_struct *prev,
1119 struct task_struct *next,
1120 unsigned long flags, int pc)
1121{
1122 struct ftrace_event_call *call = &event_context_switch;
1123 struct ring_buffer_event *event;
1124 struct ctx_switch_entry *entry;
1125
1126 event = trace_buffer_lock_reserve(tr, TRACE_CTX,
1127 sizeof(*entry), flags, pc);
1128 if (!event)
1129 return;
1130 entry = ring_buffer_event_data(event);
1131 entry->prev_pid = prev->pid;
1132 entry->prev_prio = prev->prio;
1133 entry->prev_state = prev->state;
1134 entry->next_pid = next->pid;
1135 entry->next_prio = next->prio;
1136 entry->next_state = next->state;
1137 entry->next_cpu = task_cpu(next);
1138
1139 if (!filter_check_discard(call, entry, tr->buffer, event))
1140 trace_buffer_unlock_commit(tr, event, flags, pc);
1141}
1142
1143void
1144tracing_sched_wakeup_trace(struct trace_array *tr,
1145 struct task_struct *wakee,
1146 struct task_struct *curr,
1147 unsigned long flags, int pc)
1148{
1149 struct ftrace_event_call *call = &event_wakeup;
1150 struct ring_buffer_event *event;
1151 struct ctx_switch_entry *entry;
1152
1153 event = trace_buffer_lock_reserve(tr, TRACE_WAKE,
1154 sizeof(*entry), flags, pc);
1155 if (!event)
1156 return;
1157 entry = ring_buffer_event_data(event);
1158 entry->prev_pid = curr->pid;
1159 entry->prev_prio = curr->prio;
1160 entry->prev_state = curr->state;
1161 entry->next_pid = wakee->pid;
1162 entry->next_prio = wakee->prio;
1163 entry->next_state = wakee->state;
1164 entry->next_cpu = task_cpu(wakee);
1165
1166 if (!filter_check_discard(call, entry, tr->buffer, event))
1167 ring_buffer_unlock_commit(tr->buffer, event);
1168 ftrace_trace_stack(tr, flags, 6, pc);
1169 ftrace_trace_userstack(tr, flags, pc);
1170}
1171
1172void
1173ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) 1121ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
1174{ 1122{
1175 struct trace_array *tr = &global_trace; 1123 struct trace_array *tr = &global_trace;
@@ -1193,68 +1141,6 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
1193 local_irq_restore(flags); 1141 local_irq_restore(flags);
1194} 1142}
1195 1143
1196#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1197int trace_graph_entry(struct ftrace_graph_ent *trace)
1198{
1199 struct trace_array *tr = &global_trace;
1200 struct trace_array_cpu *data;
1201 unsigned long flags;
1202 long disabled;
1203 int ret;
1204 int cpu;
1205 int pc;
1206
1207 if (!ftrace_trace_task(current))
1208 return 0;
1209
1210 if (!ftrace_graph_addr(trace->func))
1211 return 0;
1212
1213 local_irq_save(flags);
1214 cpu = raw_smp_processor_id();
1215 data = tr->data[cpu];
1216 disabled = atomic_inc_return(&data->disabled);
1217 if (likely(disabled == 1)) {
1218 pc = preempt_count();
1219 ret = __trace_graph_entry(tr, trace, flags, pc);
1220 } else {
1221 ret = 0;
1222 }
1223 /* Only do the atomic if it is not already set */
1224 if (!test_tsk_trace_graph(current))
1225 set_tsk_trace_graph(current);
1226
1227 atomic_dec(&data->disabled);
1228 local_irq_restore(flags);
1229
1230 return ret;
1231}
1232
1233void trace_graph_return(struct ftrace_graph_ret *trace)
1234{
1235 struct trace_array *tr = &global_trace;
1236 struct trace_array_cpu *data;
1237 unsigned long flags;
1238 long disabled;
1239 int cpu;
1240 int pc;
1241
1242 local_irq_save(flags);
1243 cpu = raw_smp_processor_id();
1244 data = tr->data[cpu];
1245 disabled = atomic_inc_return(&data->disabled);
1246 if (likely(disabled == 1)) {
1247 pc = preempt_count();
1248 __trace_graph_return(tr, trace, flags, pc);
1249 }
1250 if (!trace->depth)
1251 clear_tsk_trace_graph(current);
1252 atomic_dec(&data->disabled);
1253 local_irq_restore(flags);
1254}
1255#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
1256
1257
1258/** 1144/**
1259 * trace_vbprintk - write binary msg to tracing buffer 1145 * trace_vbprintk - write binary msg to tracing buffer
1260 * 1146 *
@@ -1267,6 +1153,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
1267 1153
1268 struct ftrace_event_call *call = &event_bprint; 1154 struct ftrace_event_call *call = &event_bprint;
1269 struct ring_buffer_event *event; 1155 struct ring_buffer_event *event;
1156 struct ring_buffer *buffer;
1270 struct trace_array *tr = &global_trace; 1157 struct trace_array *tr = &global_trace;
1271 struct trace_array_cpu *data; 1158 struct trace_array_cpu *data;
1272 struct bprint_entry *entry; 1159 struct bprint_entry *entry;
@@ -1299,7 +1186,9 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
1299 goto out_unlock; 1186 goto out_unlock;
1300 1187
1301 size = sizeof(*entry) + sizeof(u32) * len; 1188 size = sizeof(*entry) + sizeof(u32) * len;
1302 event = trace_buffer_lock_reserve(tr, TRACE_BPRINT, size, flags, pc); 1189 buffer = tr->buffer;
1190 event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
1191 flags, pc);
1303 if (!event) 1192 if (!event)
1304 goto out_unlock; 1193 goto out_unlock;
1305 entry = ring_buffer_event_data(event); 1194 entry = ring_buffer_event_data(event);
@@ -1307,8 +1196,8 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
1307 entry->fmt = fmt; 1196 entry->fmt = fmt;
1308 1197
1309 memcpy(entry->buf, trace_buf, sizeof(u32) * len); 1198 memcpy(entry->buf, trace_buf, sizeof(u32) * len);
1310 if (!filter_check_discard(call, entry, tr->buffer, event)) 1199 if (!filter_check_discard(call, entry, buffer, event))
1311 ring_buffer_unlock_commit(tr->buffer, event); 1200 ring_buffer_unlock_commit(buffer, event);
1312 1201
1313out_unlock: 1202out_unlock:
1314 __raw_spin_unlock(&trace_buf_lock); 1203 __raw_spin_unlock(&trace_buf_lock);
@@ -1323,14 +1212,30 @@ out:
1323} 1212}
1324EXPORT_SYMBOL_GPL(trace_vbprintk); 1213EXPORT_SYMBOL_GPL(trace_vbprintk);
1325 1214
1326int trace_vprintk(unsigned long ip, const char *fmt, va_list args) 1215int trace_array_printk(struct trace_array *tr,
1216 unsigned long ip, const char *fmt, ...)
1217{
1218 int ret;
1219 va_list ap;
1220
1221 if (!(trace_flags & TRACE_ITER_PRINTK))
1222 return 0;
1223
1224 va_start(ap, fmt);
1225 ret = trace_array_vprintk(tr, ip, fmt, ap);
1226 va_end(ap);
1227 return ret;
1228}
1229
1230int trace_array_vprintk(struct trace_array *tr,
1231 unsigned long ip, const char *fmt, va_list args)
1327{ 1232{
1328 static raw_spinlock_t trace_buf_lock = __RAW_SPIN_LOCK_UNLOCKED; 1233 static raw_spinlock_t trace_buf_lock = __RAW_SPIN_LOCK_UNLOCKED;
1329 static char trace_buf[TRACE_BUF_SIZE]; 1234 static char trace_buf[TRACE_BUF_SIZE];
1330 1235
1331 struct ftrace_event_call *call = &event_print; 1236 struct ftrace_event_call *call = &event_print;
1332 struct ring_buffer_event *event; 1237 struct ring_buffer_event *event;
1333 struct trace_array *tr = &global_trace; 1238 struct ring_buffer *buffer;
1334 struct trace_array_cpu *data; 1239 struct trace_array_cpu *data;
1335 int cpu, len = 0, size, pc; 1240 int cpu, len = 0, size, pc;
1336 struct print_entry *entry; 1241 struct print_entry *entry;
@@ -1358,7 +1263,9 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
1358 trace_buf[len] = 0; 1263 trace_buf[len] = 0;
1359 1264
1360 size = sizeof(*entry) + len + 1; 1265 size = sizeof(*entry) + len + 1;
1361 event = trace_buffer_lock_reserve(tr, TRACE_PRINT, size, irq_flags, pc); 1266 buffer = tr->buffer;
1267 event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
1268 irq_flags, pc);
1362 if (!event) 1269 if (!event)
1363 goto out_unlock; 1270 goto out_unlock;
1364 entry = ring_buffer_event_data(event); 1271 entry = ring_buffer_event_data(event);
@@ -1366,8 +1273,8 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
1366 1273
1367 memcpy(&entry->buf, trace_buf, len); 1274 memcpy(&entry->buf, trace_buf, len);
1368 entry->buf[len] = 0; 1275 entry->buf[len] = 0;
1369 if (!filter_check_discard(call, entry, tr->buffer, event)) 1276 if (!filter_check_discard(call, entry, buffer, event))
1370 ring_buffer_unlock_commit(tr->buffer, event); 1277 ring_buffer_unlock_commit(buffer, event);
1371 1278
1372 out_unlock: 1279 out_unlock:
1373 __raw_spin_unlock(&trace_buf_lock); 1280 __raw_spin_unlock(&trace_buf_lock);
@@ -1379,6 +1286,11 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
1379 1286
1380 return len; 1287 return len;
1381} 1288}
1289
1290int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
1291{
1292 return trace_array_printk(&global_trace, ip, fmt, args);
1293}
1382EXPORT_SYMBOL_GPL(trace_vprintk); 1294EXPORT_SYMBOL_GPL(trace_vprintk);
1383 1295
1384enum trace_file_type { 1296enum trace_file_type {
@@ -1518,6 +1430,37 @@ static void *s_next(struct seq_file *m, void *v, loff_t *pos)
1518 return ent; 1430 return ent;
1519} 1431}
1520 1432
1433static void tracing_iter_reset(struct trace_iterator *iter, int cpu)
1434{
1435 struct trace_array *tr = iter->tr;
1436 struct ring_buffer_event *event;
1437 struct ring_buffer_iter *buf_iter;
1438 unsigned long entries = 0;
1439 u64 ts;
1440
1441 tr->data[cpu]->skipped_entries = 0;
1442
1443 if (!iter->buffer_iter[cpu])
1444 return;
1445
1446 buf_iter = iter->buffer_iter[cpu];
1447 ring_buffer_iter_reset(buf_iter);
1448
1449 /*
1450 * We could have the case with the max latency tracers
1451 * that a reset never took place on a cpu. This is evident
1452 * by the timestamp being before the start of the buffer.
1453 */
1454 while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
1455 if (ts >= iter->tr->time_start)
1456 break;
1457 entries++;
1458 ring_buffer_read(buf_iter, NULL);
1459 }
1460
1461 tr->data[cpu]->skipped_entries = entries;
1462}
1463
1521/* 1464/*
1522 * No necessary locking here. The worst thing which can 1465 * No necessary locking here. The worst thing which can
1523 * happen is loosing events consumed at the same time 1466 * happen is loosing events consumed at the same time
@@ -1556,10 +1499,9 @@ static void *s_start(struct seq_file *m, loff_t *pos)
1556 1499
1557 if (cpu_file == TRACE_PIPE_ALL_CPU) { 1500 if (cpu_file == TRACE_PIPE_ALL_CPU) {
1558 for_each_tracing_cpu(cpu) 1501 for_each_tracing_cpu(cpu)
1559 ring_buffer_iter_reset(iter->buffer_iter[cpu]); 1502 tracing_iter_reset(iter, cpu);
1560 } else 1503 } else
1561 ring_buffer_iter_reset(iter->buffer_iter[cpu_file]); 1504 tracing_iter_reset(iter, cpu_file);
1562
1563 1505
1564 ftrace_enable_cpu(); 1506 ftrace_enable_cpu();
1565 1507
@@ -1608,16 +1550,32 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter)
1608 struct trace_array *tr = iter->tr; 1550 struct trace_array *tr = iter->tr;
1609 struct trace_array_cpu *data = tr->data[tr->cpu]; 1551 struct trace_array_cpu *data = tr->data[tr->cpu];
1610 struct tracer *type = current_trace; 1552 struct tracer *type = current_trace;
1611 unsigned long total; 1553 unsigned long entries = 0;
1612 unsigned long entries; 1554 unsigned long total = 0;
1555 unsigned long count;
1613 const char *name = "preemption"; 1556 const char *name = "preemption";
1557 int cpu;
1614 1558
1615 if (type) 1559 if (type)
1616 name = type->name; 1560 name = type->name;
1617 1561
1618 entries = ring_buffer_entries(iter->tr->buffer); 1562
1619 total = entries + 1563 for_each_tracing_cpu(cpu) {
1620 ring_buffer_overruns(iter->tr->buffer); 1564 count = ring_buffer_entries_cpu(tr->buffer, cpu);
1565 /*
1566 * If this buffer has skipped entries, then we hold all
1567 * entries for the trace and we need to ignore the
1568 * ones before the time stamp.
1569 */
1570 if (tr->data[cpu]->skipped_entries) {
1571 count -= tr->data[cpu]->skipped_entries;
1572 /* total is the same as the entries */
1573 total += count;
1574 } else
1575 total += count +
1576 ring_buffer_overrun_cpu(tr->buffer, cpu);
1577 entries += count;
1578 }
1621 1579
1622 seq_printf(m, "# %s latency trace v1.1.5 on %s\n", 1580 seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
1623 name, UTS_RELEASE); 1581 name, UTS_RELEASE);
@@ -1659,7 +1617,7 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter)
1659 seq_puts(m, "\n# => ended at: "); 1617 seq_puts(m, "\n# => ended at: ");
1660 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags); 1618 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
1661 trace_print_seq(m, &iter->seq); 1619 trace_print_seq(m, &iter->seq);
1662 seq_puts(m, "#\n"); 1620 seq_puts(m, "\n#\n");
1663 } 1621 }
1664 1622
1665 seq_puts(m, "#\n"); 1623 seq_puts(m, "#\n");
@@ -1678,6 +1636,9 @@ static void test_cpu_buff_start(struct trace_iterator *iter)
1678 if (cpumask_test_cpu(iter->cpu, iter->started)) 1636 if (cpumask_test_cpu(iter->cpu, iter->started))
1679 return; 1637 return;
1680 1638
1639 if (iter->tr->data[iter->cpu]->skipped_entries)
1640 return;
1641
1681 cpumask_set_cpu(iter->cpu, iter->started); 1642 cpumask_set_cpu(iter->cpu, iter->started);
1682 1643
1683 /* Don't print started cpu buffer for the first entry of the trace */ 1644 /* Don't print started cpu buffer for the first entry of the trace */
@@ -1940,19 +1901,23 @@ __tracing_open(struct inode *inode, struct file *file)
1940 if (ring_buffer_overruns(iter->tr->buffer)) 1901 if (ring_buffer_overruns(iter->tr->buffer))
1941 iter->iter_flags |= TRACE_FILE_ANNOTATE; 1902 iter->iter_flags |= TRACE_FILE_ANNOTATE;
1942 1903
1904 /* stop the trace while dumping */
1905 tracing_stop();
1906
1943 if (iter->cpu_file == TRACE_PIPE_ALL_CPU) { 1907 if (iter->cpu_file == TRACE_PIPE_ALL_CPU) {
1944 for_each_tracing_cpu(cpu) { 1908 for_each_tracing_cpu(cpu) {
1945 1909
1946 iter->buffer_iter[cpu] = 1910 iter->buffer_iter[cpu] =
1947 ring_buffer_read_start(iter->tr->buffer, cpu); 1911 ring_buffer_read_start(iter->tr->buffer, cpu);
1912 tracing_iter_reset(iter, cpu);
1948 } 1913 }
1949 } else { 1914 } else {
1950 cpu = iter->cpu_file; 1915 cpu = iter->cpu_file;
1951 iter->buffer_iter[cpu] = 1916 iter->buffer_iter[cpu] =
1952 ring_buffer_read_start(iter->tr->buffer, cpu); 1917 ring_buffer_read_start(iter->tr->buffer, cpu);
1918 tracing_iter_reset(iter, cpu);
1953 } 1919 }
1954 1920
1955 /* TODO stop tracer */
1956 ret = seq_open(file, &tracer_seq_ops); 1921 ret = seq_open(file, &tracer_seq_ops);
1957 if (ret < 0) { 1922 if (ret < 0) {
1958 fail_ret = ERR_PTR(ret); 1923 fail_ret = ERR_PTR(ret);
@@ -1962,9 +1927,6 @@ __tracing_open(struct inode *inode, struct file *file)
1962 m = file->private_data; 1927 m = file->private_data;
1963 m->private = iter; 1928 m->private = iter;
1964 1929
1965 /* stop the trace while dumping */
1966 tracing_stop();
1967
1968 mutex_unlock(&trace_types_lock); 1930 mutex_unlock(&trace_types_lock);
1969 1931
1970 return iter; 1932 return iter;
@@ -1975,6 +1937,7 @@ __tracing_open(struct inode *inode, struct file *file)
1975 ring_buffer_read_finish(iter->buffer_iter[cpu]); 1937 ring_buffer_read_finish(iter->buffer_iter[cpu]);
1976 } 1938 }
1977 free_cpumask_var(iter->started); 1939 free_cpumask_var(iter->started);
1940 tracing_start();
1978 fail: 1941 fail:
1979 mutex_unlock(&trace_types_lock); 1942 mutex_unlock(&trace_types_lock);
1980 kfree(iter->trace); 1943 kfree(iter->trace);
@@ -2031,7 +1994,7 @@ static int tracing_open(struct inode *inode, struct file *file)
2031 1994
2032 /* If this file was open for write, then erase contents */ 1995 /* If this file was open for write, then erase contents */
2033 if ((file->f_mode & FMODE_WRITE) && 1996 if ((file->f_mode & FMODE_WRITE) &&
2034 !(file->f_flags & O_APPEND)) { 1997 (file->f_flags & O_TRUNC)) {
2035 long cpu = (long) inode->i_private; 1998 long cpu = (long) inode->i_private;
2036 1999
2037 if (cpu == TRACE_PIPE_ALL_CPU) 2000 if (cpu == TRACE_PIPE_ALL_CPU)
@@ -2053,25 +2016,23 @@ static int tracing_open(struct inode *inode, struct file *file)
2053static void * 2016static void *
2054t_next(struct seq_file *m, void *v, loff_t *pos) 2017t_next(struct seq_file *m, void *v, loff_t *pos)
2055{ 2018{
2056 struct tracer *t = m->private; 2019 struct tracer *t = v;
2057 2020
2058 (*pos)++; 2021 (*pos)++;
2059 2022
2060 if (t) 2023 if (t)
2061 t = t->next; 2024 t = t->next;
2062 2025
2063 m->private = t;
2064
2065 return t; 2026 return t;
2066} 2027}
2067 2028
2068static void *t_start(struct seq_file *m, loff_t *pos) 2029static void *t_start(struct seq_file *m, loff_t *pos)
2069{ 2030{
2070 struct tracer *t = m->private; 2031 struct tracer *t;
2071 loff_t l = 0; 2032 loff_t l = 0;
2072 2033
2073 mutex_lock(&trace_types_lock); 2034 mutex_lock(&trace_types_lock);
2074 for (; t && l < *pos; t = t_next(m, t, &l)) 2035 for (t = trace_types; t && l < *pos; t = t_next(m, t, &l))
2075 ; 2036 ;
2076 2037
2077 return t; 2038 return t;
@@ -2107,18 +2068,10 @@ static struct seq_operations show_traces_seq_ops = {
2107 2068
2108static int show_traces_open(struct inode *inode, struct file *file) 2069static int show_traces_open(struct inode *inode, struct file *file)
2109{ 2070{
2110 int ret;
2111
2112 if (tracing_disabled) 2071 if (tracing_disabled)
2113 return -ENODEV; 2072 return -ENODEV;
2114 2073
2115 ret = seq_open(file, &show_traces_seq_ops); 2074 return seq_open(file, &show_traces_seq_ops);
2116 if (!ret) {
2117 struct seq_file *m = file->private_data;
2118 m->private = trace_types;
2119 }
2120
2121 return ret;
2122} 2075}
2123 2076
2124static ssize_t 2077static ssize_t
@@ -2191,11 +2144,12 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
2191 if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL)) 2144 if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
2192 return -ENOMEM; 2145 return -ENOMEM;
2193 2146
2194 mutex_lock(&tracing_cpumask_update_lock);
2195 err = cpumask_parse_user(ubuf, count, tracing_cpumask_new); 2147 err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
2196 if (err) 2148 if (err)
2197 goto err_unlock; 2149 goto err_unlock;
2198 2150
2151 mutex_lock(&tracing_cpumask_update_lock);
2152
2199 local_irq_disable(); 2153 local_irq_disable();
2200 __raw_spin_lock(&ftrace_max_lock); 2154 __raw_spin_lock(&ftrace_max_lock);
2201 for_each_tracing_cpu(cpu) { 2155 for_each_tracing_cpu(cpu) {
@@ -2223,8 +2177,7 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
2223 return count; 2177 return count;
2224 2178
2225err_unlock: 2179err_unlock:
2226 mutex_unlock(&tracing_cpumask_update_lock); 2180 free_cpumask_var(tracing_cpumask_new);
2227 free_cpumask_var(tracing_cpumask);
2228 2181
2229 return err; 2182 return err;
2230} 2183}
@@ -2266,8 +2219,8 @@ tracing_trace_options_read(struct file *filp, char __user *ubuf,
2266 len += 3; /* "no" and newline */ 2219 len += 3; /* "no" and newline */
2267 } 2220 }
2268 2221
2269 /* +2 for \n and \0 */ 2222 /* +1 for \0 */
2270 buf = kmalloc(len + 2, GFP_KERNEL); 2223 buf = kmalloc(len + 1, GFP_KERNEL);
2271 if (!buf) { 2224 if (!buf) {
2272 mutex_unlock(&trace_types_lock); 2225 mutex_unlock(&trace_types_lock);
2273 return -ENOMEM; 2226 return -ENOMEM;
@@ -2290,7 +2243,7 @@ tracing_trace_options_read(struct file *filp, char __user *ubuf,
2290 } 2243 }
2291 mutex_unlock(&trace_types_lock); 2244 mutex_unlock(&trace_types_lock);
2292 2245
2293 WARN_ON(r >= len + 2); 2246 WARN_ON(r >= len + 1);
2294 2247
2295 r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 2248 r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2296 2249
@@ -2301,23 +2254,23 @@ tracing_trace_options_read(struct file *filp, char __user *ubuf,
2301/* Try to assign a tracer specific option */ 2254/* Try to assign a tracer specific option */
2302static int set_tracer_option(struct tracer *trace, char *cmp, int neg) 2255static int set_tracer_option(struct tracer *trace, char *cmp, int neg)
2303{ 2256{
2304 struct tracer_flags *trace_flags = trace->flags; 2257 struct tracer_flags *tracer_flags = trace->flags;
2305 struct tracer_opt *opts = NULL; 2258 struct tracer_opt *opts = NULL;
2306 int ret = 0, i = 0; 2259 int ret = 0, i = 0;
2307 int len; 2260 int len;
2308 2261
2309 for (i = 0; trace_flags->opts[i].name; i++) { 2262 for (i = 0; tracer_flags->opts[i].name; i++) {
2310 opts = &trace_flags->opts[i]; 2263 opts = &tracer_flags->opts[i];
2311 len = strlen(opts->name); 2264 len = strlen(opts->name);
2312 2265
2313 if (strncmp(cmp, opts->name, len) == 0) { 2266 if (strncmp(cmp, opts->name, len) == 0) {
2314 ret = trace->set_flag(trace_flags->val, 2267 ret = trace->set_flag(tracer_flags->val,
2315 opts->bit, !neg); 2268 opts->bit, !neg);
2316 break; 2269 break;
2317 } 2270 }
2318 } 2271 }
2319 /* Not found */ 2272 /* Not found */
2320 if (!trace_flags->opts[i].name) 2273 if (!tracer_flags->opts[i].name)
2321 return -EINVAL; 2274 return -EINVAL;
2322 2275
2323 /* Refused to handle */ 2276 /* Refused to handle */
@@ -2325,9 +2278,9 @@ static int set_tracer_option(struct tracer *trace, char *cmp, int neg)
2325 return ret; 2278 return ret;
2326 2279
2327 if (neg) 2280 if (neg)
2328 trace_flags->val &= ~opts->bit; 2281 tracer_flags->val &= ~opts->bit;
2329 else 2282 else
2330 trace_flags->val |= opts->bit; 2283 tracer_flags->val |= opts->bit;
2331 2284
2332 return 0; 2285 return 0;
2333} 2286}
@@ -2342,22 +2295,6 @@ static void set_tracer_flags(unsigned int mask, int enabled)
2342 trace_flags |= mask; 2295 trace_flags |= mask;
2343 else 2296 else
2344 trace_flags &= ~mask; 2297 trace_flags &= ~mask;
2345
2346 if (mask == TRACE_ITER_GLOBAL_CLK) {
2347 u64 (*func)(void);
2348
2349 if (enabled)
2350 func = trace_clock_global;
2351 else
2352 func = trace_clock_local;
2353
2354 mutex_lock(&trace_types_lock);
2355 ring_buffer_set_clock(global_trace.buffer, func);
2356
2357 if (max_tr.buffer)
2358 ring_buffer_set_clock(max_tr.buffer, func);
2359 mutex_unlock(&trace_types_lock);
2360 }
2361} 2298}
2362 2299
2363static ssize_t 2300static ssize_t
@@ -3095,7 +3032,8 @@ tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
3095 break; 3032 break;
3096 } 3033 }
3097 3034
3098 trace_consume(iter); 3035 if (ret != TRACE_TYPE_NO_CONSUME)
3036 trace_consume(iter);
3099 rem -= count; 3037 rem -= count;
3100 if (!find_next_entry_inc(iter)) { 3038 if (!find_next_entry_inc(iter)) {
3101 rem = 0; 3039 rem = 0;
@@ -3324,6 +3262,62 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
3324 return cnt; 3262 return cnt;
3325} 3263}
3326 3264
3265static ssize_t tracing_clock_read(struct file *filp, char __user *ubuf,
3266 size_t cnt, loff_t *ppos)
3267{
3268 char buf[64];
3269 int bufiter = 0;
3270 int i;
3271
3272 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
3273 bufiter += snprintf(buf + bufiter, sizeof(buf) - bufiter,
3274 "%s%s%s%s", i ? " " : "",
3275 i == trace_clock_id ? "[" : "", trace_clocks[i].name,
3276 i == trace_clock_id ? "]" : "");
3277 bufiter += snprintf(buf + bufiter, sizeof(buf) - bufiter, "\n");
3278
3279 return simple_read_from_buffer(ubuf, cnt, ppos, buf, bufiter);
3280}
3281
3282static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
3283 size_t cnt, loff_t *fpos)
3284{
3285 char buf[64];
3286 const char *clockstr;
3287 int i;
3288
3289 if (cnt >= sizeof(buf))
3290 return -EINVAL;
3291
3292 if (copy_from_user(&buf, ubuf, cnt))
3293 return -EFAULT;
3294
3295 buf[cnt] = 0;
3296
3297 clockstr = strstrip(buf);
3298
3299 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
3300 if (strcmp(trace_clocks[i].name, clockstr) == 0)
3301 break;
3302 }
3303 if (i == ARRAY_SIZE(trace_clocks))
3304 return -EINVAL;
3305
3306 trace_clock_id = i;
3307
3308 mutex_lock(&trace_types_lock);
3309
3310 ring_buffer_set_clock(global_trace.buffer, trace_clocks[i].func);
3311 if (max_tr.buffer)
3312 ring_buffer_set_clock(max_tr.buffer, trace_clocks[i].func);
3313
3314 mutex_unlock(&trace_types_lock);
3315
3316 *fpos += cnt;
3317
3318 return cnt;
3319}
3320
3327static const struct file_operations tracing_max_lat_fops = { 3321static const struct file_operations tracing_max_lat_fops = {
3328 .open = tracing_open_generic, 3322 .open = tracing_open_generic,
3329 .read = tracing_max_lat_read, 3323 .read = tracing_max_lat_read,
@@ -3361,6 +3355,12 @@ static const struct file_operations tracing_mark_fops = {
3361 .write = tracing_mark_write, 3355 .write = tracing_mark_write,
3362}; 3356};
3363 3357
3358static const struct file_operations trace_clock_fops = {
3359 .open = tracing_open_generic,
3360 .read = tracing_clock_read,
3361 .write = tracing_clock_write,
3362};
3363
3364struct ftrace_buffer_info { 3364struct ftrace_buffer_info {
3365 struct trace_array *tr; 3365 struct trace_array *tr;
3366 void *spare; 3366 void *spare;
@@ -3626,7 +3626,7 @@ tracing_stats_read(struct file *filp, char __user *ubuf,
3626 struct trace_seq *s; 3626 struct trace_seq *s;
3627 unsigned long cnt; 3627 unsigned long cnt;
3628 3628
3629 s = kmalloc(sizeof(*s), GFP_ATOMIC); 3629 s = kmalloc(sizeof(*s), GFP_KERNEL);
3630 if (!s) 3630 if (!s)
3631 return ENOMEM; 3631 return ENOMEM;
3632 3632
@@ -3641,9 +3641,6 @@ tracing_stats_read(struct file *filp, char __user *ubuf,
3641 cnt = ring_buffer_commit_overrun_cpu(tr->buffer, cpu); 3641 cnt = ring_buffer_commit_overrun_cpu(tr->buffer, cpu);
3642 trace_seq_printf(s, "commit overrun: %ld\n", cnt); 3642 trace_seq_printf(s, "commit overrun: %ld\n", cnt);
3643 3643
3644 cnt = ring_buffer_nmi_dropped_cpu(tr->buffer, cpu);
3645 trace_seq_printf(s, "nmi dropped: %ld\n", cnt);
3646
3647 count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len); 3644 count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
3648 3645
3649 kfree(s); 3646 kfree(s);
@@ -3904,17 +3901,9 @@ trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
3904 if (ret < 0) 3901 if (ret < 0)
3905 return ret; 3902 return ret;
3906 3903
3907 switch (val) { 3904 if (val != 0 && val != 1)
3908 case 0:
3909 trace_flags &= ~(1 << index);
3910 break;
3911 case 1:
3912 trace_flags |= 1 << index;
3913 break;
3914
3915 default:
3916 return -EINVAL; 3905 return -EINVAL;
3917 } 3906 set_tracer_flags(1 << index, val);
3918 3907
3919 *ppos += cnt; 3908 *ppos += cnt;
3920 3909
@@ -4082,11 +4071,13 @@ static __init int tracer_init_debugfs(void)
4082 trace_create_file("current_tracer", 0644, d_tracer, 4071 trace_create_file("current_tracer", 0644, d_tracer,
4083 &global_trace, &set_tracer_fops); 4072 &global_trace, &set_tracer_fops);
4084 4073
4074#ifdef CONFIG_TRACER_MAX_TRACE
4085 trace_create_file("tracing_max_latency", 0644, d_tracer, 4075 trace_create_file("tracing_max_latency", 0644, d_tracer,
4086 &tracing_max_latency, &tracing_max_lat_fops); 4076 &tracing_max_latency, &tracing_max_lat_fops);
4087 4077
4088 trace_create_file("tracing_thresh", 0644, d_tracer, 4078 trace_create_file("tracing_thresh", 0644, d_tracer,
4089 &tracing_thresh, &tracing_max_lat_fops); 4079 &tracing_thresh, &tracing_max_lat_fops);
4080#endif
4090 4081
4091 trace_create_file("README", 0444, d_tracer, 4082 trace_create_file("README", 0444, d_tracer,
4092 NULL, &tracing_readme_fops); 4083 NULL, &tracing_readme_fops);
@@ -4103,6 +4094,9 @@ static __init int tracer_init_debugfs(void)
4103 trace_create_file("saved_cmdlines", 0444, d_tracer, 4094 trace_create_file("saved_cmdlines", 0444, d_tracer,
4104 NULL, &tracing_saved_cmdlines_fops); 4095 NULL, &tracing_saved_cmdlines_fops);
4105 4096
4097 trace_create_file("trace_clock", 0644, d_tracer, NULL,
4098 &trace_clock_fops);
4099
4106#ifdef CONFIG_DYNAMIC_FTRACE 4100#ifdef CONFIG_DYNAMIC_FTRACE
4107 trace_create_file("dyn_ftrace_total_info", 0444, d_tracer, 4101 trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
4108 &ftrace_update_tot_cnt, &tracing_dyn_info_fops); 4102 &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
@@ -4243,8 +4237,11 @@ static void __ftrace_dump(bool disable_tracing)
4243 iter.pos = -1; 4237 iter.pos = -1;
4244 4238
4245 if (find_next_entry_inc(&iter) != NULL) { 4239 if (find_next_entry_inc(&iter) != NULL) {
4246 print_trace_line(&iter); 4240 int ret;
4247 trace_consume(&iter); 4241
4242 ret = print_trace_line(&iter);
4243 if (ret != TRACE_TYPE_NO_CONSUME)
4244 trace_consume(&iter);
4248 } 4245 }
4249 4246
4250 trace_printk_seq(&iter.seq); 4247 trace_printk_seq(&iter.seq);
@@ -4278,7 +4275,6 @@ void ftrace_dump(void)
4278 4275
4279__init static int tracer_alloc_buffers(void) 4276__init static int tracer_alloc_buffers(void)
4280{ 4277{
4281 struct trace_array_cpu *data;
4282 int ring_buf_size; 4278 int ring_buf_size;
4283 int i; 4279 int i;
4284 int ret = -ENOMEM; 4280 int ret = -ENOMEM;
@@ -4328,7 +4324,7 @@ __init static int tracer_alloc_buffers(void)
4328 4324
4329 /* Allocate the first page for all buffers */ 4325 /* Allocate the first page for all buffers */
4330 for_each_tracing_cpu(i) { 4326 for_each_tracing_cpu(i) {
4331 data = global_trace.data[i] = &per_cpu(global_trace_cpu, i); 4327 global_trace.data[i] = &per_cpu(global_trace_cpu, i);
4332 max_tr.data[i] = &per_cpu(max_data, i); 4328 max_tr.data[i] = &per_cpu(max_data, i);
4333 } 4329 }
4334 4330
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index ff1ef411a176..ea7e0bcbd539 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -38,8 +38,6 @@ enum trace_type {
38 TRACE_GRAPH_ENT, 38 TRACE_GRAPH_ENT,
39 TRACE_USER_STACK, 39 TRACE_USER_STACK,
40 TRACE_HW_BRANCHES, 40 TRACE_HW_BRANCHES,
41 TRACE_SYSCALL_ENTER,
42 TRACE_SYSCALL_EXIT,
43 TRACE_KMEM_ALLOC, 41 TRACE_KMEM_ALLOC,
44 TRACE_KMEM_FREE, 42 TRACE_KMEM_FREE,
45 TRACE_POWER, 43 TRACE_POWER,
@@ -251,9 +249,6 @@ struct trace_array_cpu {
251 atomic_t disabled; 249 atomic_t disabled;
252 void *buffer_page; /* ring buffer spare */ 250 void *buffer_page; /* ring buffer spare */
253 251
254 /* these fields get copied into max-trace: */
255 unsigned long trace_idx;
256 unsigned long overrun;
257 unsigned long saved_latency; 252 unsigned long saved_latency;
258 unsigned long critical_start; 253 unsigned long critical_start;
259 unsigned long critical_end; 254 unsigned long critical_end;
@@ -261,6 +256,7 @@ struct trace_array_cpu {
261 unsigned long nice; 256 unsigned long nice;
262 unsigned long policy; 257 unsigned long policy;
263 unsigned long rt_priority; 258 unsigned long rt_priority;
259 unsigned long skipped_entries;
264 cycle_t preempt_timestamp; 260 cycle_t preempt_timestamp;
265 pid_t pid; 261 pid_t pid;
266 uid_t uid; 262 uid_t uid;
@@ -334,10 +330,6 @@ extern void __ftrace_bad_type(void);
334 TRACE_KMEM_ALLOC); \ 330 TRACE_KMEM_ALLOC); \
335 IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \ 331 IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \
336 TRACE_KMEM_FREE); \ 332 TRACE_KMEM_FREE); \
337 IF_ASSIGN(var, ent, struct syscall_trace_enter, \
338 TRACE_SYSCALL_ENTER); \
339 IF_ASSIGN(var, ent, struct syscall_trace_exit, \
340 TRACE_SYSCALL_EXIT); \
341 IF_ASSIGN(var, ent, struct ksym_trace_entry, TRACE_KSYM);\ 333 IF_ASSIGN(var, ent, struct ksym_trace_entry, TRACE_KSYM);\
342 __ftrace_bad_type(); \ 334 __ftrace_bad_type(); \
343 } while (0) 335 } while (0)
@@ -439,12 +431,13 @@ void init_tracer_sysprof_debugfs(struct dentry *d_tracer);
439 431
440struct ring_buffer_event; 432struct ring_buffer_event;
441 433
442struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr, 434struct ring_buffer_event *
443 int type, 435trace_buffer_lock_reserve(struct ring_buffer *buffer,
444 unsigned long len, 436 int type,
445 unsigned long flags, 437 unsigned long len,
446 int pc); 438 unsigned long flags,
447void trace_buffer_unlock_commit(struct trace_array *tr, 439 int pc);
440void trace_buffer_unlock_commit(struct ring_buffer *buffer,
448 struct ring_buffer_event *event, 441 struct ring_buffer_event *event,
449 unsigned long flags, int pc); 442 unsigned long flags, int pc);
450 443
@@ -454,10 +447,6 @@ struct trace_entry *tracing_get_trace_entry(struct trace_array *tr,
454struct trace_entry *trace_find_next_entry(struct trace_iterator *iter, 447struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
455 int *ent_cpu, u64 *ent_ts); 448 int *ent_cpu, u64 *ent_ts);
456 449
457void tracing_generic_entry_update(struct trace_entry *entry,
458 unsigned long flags,
459 int pc);
460
461void default_wait_pipe(struct trace_iterator *iter); 450void default_wait_pipe(struct trace_iterator *iter);
462void poll_wait_pipe(struct trace_iterator *iter); 451void poll_wait_pipe(struct trace_iterator *iter);
463 452
@@ -487,6 +476,7 @@ void trace_function(struct trace_array *tr,
487 476
488void trace_graph_return(struct ftrace_graph_ret *trace); 477void trace_graph_return(struct ftrace_graph_ret *trace);
489int trace_graph_entry(struct ftrace_graph_ent *trace); 478int trace_graph_entry(struct ftrace_graph_ent *trace);
479void set_graph_array(struct trace_array *tr);
490 480
491void tracing_start_cmdline_record(void); 481void tracing_start_cmdline_record(void);
492void tracing_stop_cmdline_record(void); 482void tracing_stop_cmdline_record(void);
@@ -498,16 +488,40 @@ void unregister_tracer(struct tracer *type);
498 488
499extern unsigned long nsecs_to_usecs(unsigned long nsecs); 489extern unsigned long nsecs_to_usecs(unsigned long nsecs);
500 490
491#ifdef CONFIG_TRACER_MAX_TRACE
501extern unsigned long tracing_max_latency; 492extern unsigned long tracing_max_latency;
502extern unsigned long tracing_thresh; 493extern unsigned long tracing_thresh;
503 494
504void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu); 495void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu);
505void update_max_tr_single(struct trace_array *tr, 496void update_max_tr_single(struct trace_array *tr,
506 struct task_struct *tsk, int cpu); 497 struct task_struct *tsk, int cpu);
498#endif /* CONFIG_TRACER_MAX_TRACE */
499
500#ifdef CONFIG_STACKTRACE
501void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags,
502 int skip, int pc);
503
504void ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags,
505 int pc);
507 506
508void __trace_stack(struct trace_array *tr, 507void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
509 unsigned long flags, 508 int pc);
510 int skip, int pc); 509#else
510static inline void ftrace_trace_stack(struct trace_array *tr,
511 unsigned long flags, int skip, int pc)
512{
513}
514
515static inline void ftrace_trace_userstack(struct trace_array *tr,
516 unsigned long flags, int pc)
517{
518}
519
520static inline void __trace_stack(struct trace_array *tr, unsigned long flags,
521 int skip, int pc)
522{
523}
524#endif /* CONFIG_STACKTRACE */
511 525
512extern cycle_t ftrace_now(int cpu); 526extern cycle_t ftrace_now(int cpu);
513 527
@@ -533,6 +547,10 @@ extern unsigned long ftrace_update_tot_cnt;
533extern int DYN_FTRACE_TEST_NAME(void); 547extern int DYN_FTRACE_TEST_NAME(void);
534#endif 548#endif
535 549
550extern int ring_buffer_expanded;
551extern bool tracing_selftest_disabled;
552DECLARE_PER_CPU(local_t, ftrace_cpu_disabled);
553
536#ifdef CONFIG_FTRACE_STARTUP_TEST 554#ifdef CONFIG_FTRACE_STARTUP_TEST
537extern int trace_selftest_startup_function(struct tracer *trace, 555extern int trace_selftest_startup_function(struct tracer *trace,
538 struct trace_array *tr); 556 struct trace_array *tr);
@@ -566,9 +584,16 @@ extern int
566trace_vbprintk(unsigned long ip, const char *fmt, va_list args); 584trace_vbprintk(unsigned long ip, const char *fmt, va_list args);
567extern int 585extern int
568trace_vprintk(unsigned long ip, const char *fmt, va_list args); 586trace_vprintk(unsigned long ip, const char *fmt, va_list args);
587extern int
588trace_array_vprintk(struct trace_array *tr,
589 unsigned long ip, const char *fmt, va_list args);
590int trace_array_printk(struct trace_array *tr,
591 unsigned long ip, const char *fmt, ...);
569 592
570extern unsigned long trace_flags; 593extern unsigned long trace_flags;
571 594
595extern int trace_clock_id;
596
572/* Standard output formatting function used for function return traces */ 597/* Standard output formatting function used for function return traces */
573#ifdef CONFIG_FUNCTION_GRAPH_TRACER 598#ifdef CONFIG_FUNCTION_GRAPH_TRACER
574extern enum print_line_t print_graph_function(struct trace_iterator *iter); 599extern enum print_line_t print_graph_function(struct trace_iterator *iter);
@@ -615,6 +640,7 @@ print_graph_function(struct trace_iterator *iter)
615 640
616extern struct pid *ftrace_pid_trace; 641extern struct pid *ftrace_pid_trace;
617 642
643#ifdef CONFIG_FUNCTION_TRACER
618static inline int ftrace_trace_task(struct task_struct *task) 644static inline int ftrace_trace_task(struct task_struct *task)
619{ 645{
620 if (!ftrace_pid_trace) 646 if (!ftrace_pid_trace)
@@ -622,6 +648,12 @@ static inline int ftrace_trace_task(struct task_struct *task)
622 648
623 return test_tsk_trace_trace(task); 649 return test_tsk_trace_trace(task);
624} 650}
651#else
652static inline int ftrace_trace_task(struct task_struct *task)
653{
654 return 1;
655}
656#endif
625 657
626/* 658/*
627 * trace_iterator_flags is an enumeration that defines bit 659 * trace_iterator_flags is an enumeration that defines bit
@@ -650,9 +682,8 @@ enum trace_iterator_flags {
650 TRACE_ITER_PRINTK_MSGONLY = 0x10000, 682 TRACE_ITER_PRINTK_MSGONLY = 0x10000,
651 TRACE_ITER_CONTEXT_INFO = 0x20000, /* Print pid/cpu/time */ 683 TRACE_ITER_CONTEXT_INFO = 0x20000, /* Print pid/cpu/time */
652 TRACE_ITER_LATENCY_FMT = 0x40000, 684 TRACE_ITER_LATENCY_FMT = 0x40000,
653 TRACE_ITER_GLOBAL_CLK = 0x80000, 685 TRACE_ITER_SLEEP_TIME = 0x80000,
654 TRACE_ITER_SLEEP_TIME = 0x100000, 686 TRACE_ITER_GRAPH_TIME = 0x100000,
655 TRACE_ITER_GRAPH_TIME = 0x200000,
656}; 687};
657 688
658/* 689/*
@@ -749,6 +780,7 @@ struct ftrace_event_field {
749 struct list_head link; 780 struct list_head link;
750 char *name; 781 char *name;
751 char *type; 782 char *type;
783 int filter_type;
752 int offset; 784 int offset;
753 int size; 785 int size;
754 int is_signed; 786 int is_signed;
@@ -758,13 +790,15 @@ struct event_filter {
758 int n_preds; 790 int n_preds;
759 struct filter_pred **preds; 791 struct filter_pred **preds;
760 char *filter_string; 792 char *filter_string;
793 bool no_reset;
761}; 794};
762 795
763struct event_subsystem { 796struct event_subsystem {
764 struct list_head list; 797 struct list_head list;
765 const char *name; 798 const char *name;
766 struct dentry *entry; 799 struct dentry *entry;
767 void *filter; 800 struct event_filter *filter;
801 int nr_events;
768}; 802};
769 803
770struct filter_pred; 804struct filter_pred;
@@ -792,6 +826,7 @@ extern int apply_subsystem_event_filter(struct event_subsystem *system,
792 char *filter_string); 826 char *filter_string);
793extern void print_subsystem_event_filter(struct event_subsystem *system, 827extern void print_subsystem_event_filter(struct event_subsystem *system,
794 struct trace_seq *s); 828 struct trace_seq *s);
829extern int filter_assign_type(const char *type);
795 830
796static inline int 831static inline int
797filter_check_discard(struct ftrace_event_call *call, void *rec, 832filter_check_discard(struct ftrace_event_call *call, void *rec,
diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c
index a29ef23ffb47..19bfc75d467e 100644
--- a/kernel/trace/trace_boot.c
+++ b/kernel/trace/trace_boot.c
@@ -41,14 +41,12 @@ void disable_boot_trace(void)
41 41
42static int boot_trace_init(struct trace_array *tr) 42static int boot_trace_init(struct trace_array *tr)
43{ 43{
44 int cpu;
45 boot_trace = tr; 44 boot_trace = tr;
46 45
47 if (!tr) 46 if (!tr)
48 return 0; 47 return 0;
49 48
50 for_each_cpu(cpu, cpu_possible_mask) 49 tracing_reset_online_cpus(tr);
51 tracing_reset(tr, cpu);
52 50
53 tracing_sched_switch_assign_trace(tr); 51 tracing_sched_switch_assign_trace(tr);
54 return 0; 52 return 0;
@@ -132,6 +130,7 @@ struct tracer boot_tracer __read_mostly =
132void trace_boot_call(struct boot_trace_call *bt, initcall_t fn) 130void trace_boot_call(struct boot_trace_call *bt, initcall_t fn)
133{ 131{
134 struct ring_buffer_event *event; 132 struct ring_buffer_event *event;
133 struct ring_buffer *buffer;
135 struct trace_boot_call *entry; 134 struct trace_boot_call *entry;
136 struct trace_array *tr = boot_trace; 135 struct trace_array *tr = boot_trace;
137 136
@@ -144,13 +143,14 @@ void trace_boot_call(struct boot_trace_call *bt, initcall_t fn)
144 sprint_symbol(bt->func, (unsigned long)fn); 143 sprint_symbol(bt->func, (unsigned long)fn);
145 preempt_disable(); 144 preempt_disable();
146 145
147 event = trace_buffer_lock_reserve(tr, TRACE_BOOT_CALL, 146 buffer = tr->buffer;
147 event = trace_buffer_lock_reserve(buffer, TRACE_BOOT_CALL,
148 sizeof(*entry), 0, 0); 148 sizeof(*entry), 0, 0);
149 if (!event) 149 if (!event)
150 goto out; 150 goto out;
151 entry = ring_buffer_event_data(event); 151 entry = ring_buffer_event_data(event);
152 entry->boot_call = *bt; 152 entry->boot_call = *bt;
153 trace_buffer_unlock_commit(tr, event, 0, 0); 153 trace_buffer_unlock_commit(buffer, event, 0, 0);
154 out: 154 out:
155 preempt_enable(); 155 preempt_enable();
156} 156}
@@ -158,6 +158,7 @@ void trace_boot_call(struct boot_trace_call *bt, initcall_t fn)
158void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn) 158void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn)
159{ 159{
160 struct ring_buffer_event *event; 160 struct ring_buffer_event *event;
161 struct ring_buffer *buffer;
161 struct trace_boot_ret *entry; 162 struct trace_boot_ret *entry;
162 struct trace_array *tr = boot_trace; 163 struct trace_array *tr = boot_trace;
163 164
@@ -167,13 +168,14 @@ void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn)
167 sprint_symbol(bt->func, (unsigned long)fn); 168 sprint_symbol(bt->func, (unsigned long)fn);
168 preempt_disable(); 169 preempt_disable();
169 170
170 event = trace_buffer_lock_reserve(tr, TRACE_BOOT_RET, 171 buffer = tr->buffer;
172 event = trace_buffer_lock_reserve(buffer, TRACE_BOOT_RET,
171 sizeof(*entry), 0, 0); 173 sizeof(*entry), 0, 0);
172 if (!event) 174 if (!event)
173 goto out; 175 goto out;
174 entry = ring_buffer_event_data(event); 176 entry = ring_buffer_event_data(event);
175 entry->boot_ret = *bt; 177 entry->boot_ret = *bt;
176 trace_buffer_unlock_commit(tr, event, 0, 0); 178 trace_buffer_unlock_commit(buffer, event, 0, 0);
177 out: 179 out:
178 preempt_enable(); 180 preempt_enable();
179} 181}
diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c
index 5b5895afecfe..11ba5bb4ed0a 100644
--- a/kernel/trace/trace_event_profile.c
+++ b/kernel/trace/trace_event_profile.c
@@ -14,7 +14,7 @@ int ftrace_profile_enable(int event_id)
14 14
15 mutex_lock(&event_mutex); 15 mutex_lock(&event_mutex);
16 list_for_each_entry(event, &ftrace_events, list) { 16 list_for_each_entry(event, &ftrace_events, list) {
17 if (event->id == event_id) { 17 if (event->id == event_id && event->profile_enable) {
18 ret = event->profile_enable(event); 18 ret = event->profile_enable(event);
19 break; 19 break;
20 } 20 }
diff --git a/kernel/trace/trace_event_types.h b/kernel/trace/trace_event_types.h
index 5e32e375134d..6db005e12487 100644
--- a/kernel/trace/trace_event_types.h
+++ b/kernel/trace/trace_event_types.h
@@ -26,6 +26,9 @@ TRACE_EVENT_FORMAT(funcgraph_exit, TRACE_GRAPH_RET,
26 ftrace_graph_ret_entry, ignore, 26 ftrace_graph_ret_entry, ignore,
27 TRACE_STRUCT( 27 TRACE_STRUCT(
28 TRACE_FIELD(unsigned long, ret.func, func) 28 TRACE_FIELD(unsigned long, ret.func, func)
29 TRACE_FIELD(unsigned long long, ret.calltime, calltime)
30 TRACE_FIELD(unsigned long long, ret.rettime, rettime)
31 TRACE_FIELD(unsigned long, ret.overrun, overrun)
29 TRACE_FIELD(int, ret.depth, depth) 32 TRACE_FIELD(int, ret.depth, depth)
30 ), 33 ),
31 TP_RAW_FMT("<-- %lx (%d)") 34 TP_RAW_FMT("<-- %lx (%d)")
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index aa08be69a1b6..78b1ed230177 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -17,6 +17,8 @@
17#include <linux/ctype.h> 17#include <linux/ctype.h>
18#include <linux/delay.h> 18#include <linux/delay.h>
19 19
20#include <asm/setup.h>
21
20#include "trace_output.h" 22#include "trace_output.h"
21 23
22#define TRACE_SYSTEM "TRACE_SYSTEM" 24#define TRACE_SYSTEM "TRACE_SYSTEM"
@@ -25,8 +27,9 @@ DEFINE_MUTEX(event_mutex);
25 27
26LIST_HEAD(ftrace_events); 28LIST_HEAD(ftrace_events);
27 29
28int trace_define_field(struct ftrace_event_call *call, char *type, 30int trace_define_field(struct ftrace_event_call *call, const char *type,
29 char *name, int offset, int size, int is_signed) 31 const char *name, int offset, int size, int is_signed,
32 int filter_type)
30{ 33{
31 struct ftrace_event_field *field; 34 struct ftrace_event_field *field;
32 35
@@ -42,9 +45,15 @@ int trace_define_field(struct ftrace_event_call *call, char *type,
42 if (!field->type) 45 if (!field->type)
43 goto err; 46 goto err;
44 47
48 if (filter_type == FILTER_OTHER)
49 field->filter_type = filter_assign_type(type);
50 else
51 field->filter_type = filter_type;
52
45 field->offset = offset; 53 field->offset = offset;
46 field->size = size; 54 field->size = size;
47 field->is_signed = is_signed; 55 field->is_signed = is_signed;
56
48 list_add(&field->link, &call->fields); 57 list_add(&field->link, &call->fields);
49 58
50 return 0; 59 return 0;
@@ -60,6 +69,29 @@ err:
60} 69}
61EXPORT_SYMBOL_GPL(trace_define_field); 70EXPORT_SYMBOL_GPL(trace_define_field);
62 71
72#define __common_field(type, item) \
73 ret = trace_define_field(call, #type, "common_" #item, \
74 offsetof(typeof(ent), item), \
75 sizeof(ent.item), \
76 is_signed_type(type), FILTER_OTHER); \
77 if (ret) \
78 return ret;
79
80int trace_define_common_fields(struct ftrace_event_call *call)
81{
82 int ret;
83 struct trace_entry ent;
84
85 __common_field(unsigned short, type);
86 __common_field(unsigned char, flags);
87 __common_field(unsigned char, preempt_count);
88 __common_field(int, pid);
89 __common_field(int, tgid);
90
91 return ret;
92}
93EXPORT_SYMBOL_GPL(trace_define_common_fields);
94
63#ifdef CONFIG_MODULES 95#ifdef CONFIG_MODULES
64 96
65static void trace_destroy_fields(struct ftrace_event_call *call) 97static void trace_destroy_fields(struct ftrace_event_call *call)
@@ -84,14 +116,14 @@ static void ftrace_event_enable_disable(struct ftrace_event_call *call,
84 if (call->enabled) { 116 if (call->enabled) {
85 call->enabled = 0; 117 call->enabled = 0;
86 tracing_stop_cmdline_record(); 118 tracing_stop_cmdline_record();
87 call->unregfunc(); 119 call->unregfunc(call->data);
88 } 120 }
89 break; 121 break;
90 case 1: 122 case 1:
91 if (!call->enabled) { 123 if (!call->enabled) {
92 call->enabled = 1; 124 call->enabled = 1;
93 tracing_start_cmdline_record(); 125 tracing_start_cmdline_record();
94 call->regfunc(); 126 call->regfunc(call->data);
95 } 127 }
96 break; 128 break;
97 } 129 }
@@ -300,10 +332,18 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
300 332
301static void *t_start(struct seq_file *m, loff_t *pos) 333static void *t_start(struct seq_file *m, loff_t *pos)
302{ 334{
335 struct ftrace_event_call *call = NULL;
336 loff_t l;
337
303 mutex_lock(&event_mutex); 338 mutex_lock(&event_mutex);
304 if (*pos == 0) 339
305 m->private = ftrace_events.next; 340 m->private = ftrace_events.next;
306 return t_next(m, NULL, pos); 341 for (l = 0; l <= *pos; ) {
342 call = t_next(m, NULL, &l);
343 if (!call)
344 break;
345 }
346 return call;
307} 347}
308 348
309static void * 349static void *
@@ -332,10 +372,18 @@ s_next(struct seq_file *m, void *v, loff_t *pos)
332 372
333static void *s_start(struct seq_file *m, loff_t *pos) 373static void *s_start(struct seq_file *m, loff_t *pos)
334{ 374{
375 struct ftrace_event_call *call = NULL;
376 loff_t l;
377
335 mutex_lock(&event_mutex); 378 mutex_lock(&event_mutex);
336 if (*pos == 0) 379
337 m->private = ftrace_events.next; 380 m->private = ftrace_events.next;
338 return s_next(m, NULL, pos); 381 for (l = 0; l <= *pos; ) {
382 call = s_next(m, NULL, &l);
383 if (!call)
384 break;
385 }
386 return call;
339} 387}
340 388
341static int t_show(struct seq_file *m, void *v) 389static int t_show(struct seq_file *m, void *v)
@@ -360,7 +408,7 @@ ftrace_event_seq_open(struct inode *inode, struct file *file)
360 const struct seq_operations *seq_ops; 408 const struct seq_operations *seq_ops;
361 409
362 if ((file->f_mode & FMODE_WRITE) && 410 if ((file->f_mode & FMODE_WRITE) &&
363 !(file->f_flags & O_APPEND)) 411 (file->f_flags & O_TRUNC))
364 ftrace_clear_events(); 412 ftrace_clear_events();
365 413
366 seq_ops = inode->i_private; 414 seq_ops = inode->i_private;
@@ -558,7 +606,7 @@ event_format_read(struct file *filp, char __user *ubuf, size_t cnt,
558 trace_seq_printf(s, "format:\n"); 606 trace_seq_printf(s, "format:\n");
559 trace_write_header(s); 607 trace_write_header(s);
560 608
561 r = call->show_format(s); 609 r = call->show_format(call, s);
562 if (!r) { 610 if (!r) {
563 /* 611 /*
564 * ug! The format output is bigger than a PAGE!! 612 * ug! The format output is bigger than a PAGE!!
@@ -833,8 +881,10 @@ event_subsystem_dir(const char *name, struct dentry *d_events)
833 881
834 /* First see if we did not already create this dir */ 882 /* First see if we did not already create this dir */
835 list_for_each_entry(system, &event_subsystems, list) { 883 list_for_each_entry(system, &event_subsystems, list) {
836 if (strcmp(system->name, name) == 0) 884 if (strcmp(system->name, name) == 0) {
885 system->nr_events++;
837 return system->entry; 886 return system->entry;
887 }
838 } 888 }
839 889
840 /* need to create new entry */ 890 /* need to create new entry */
@@ -853,6 +903,7 @@ event_subsystem_dir(const char *name, struct dentry *d_events)
853 return d_events; 903 return d_events;
854 } 904 }
855 905
906 system->nr_events = 1;
856 system->name = kstrdup(name, GFP_KERNEL); 907 system->name = kstrdup(name, GFP_KERNEL);
857 if (!system->name) { 908 if (!system->name) {
858 debugfs_remove(system->entry); 909 debugfs_remove(system->entry);
@@ -904,15 +955,6 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
904 if (strcmp(call->system, TRACE_SYSTEM) != 0) 955 if (strcmp(call->system, TRACE_SYSTEM) != 0)
905 d_events = event_subsystem_dir(call->system, d_events); 956 d_events = event_subsystem_dir(call->system, d_events);
906 957
907 if (call->raw_init) {
908 ret = call->raw_init();
909 if (ret < 0) {
910 pr_warning("Could not initialize trace point"
911 " events/%s\n", call->name);
912 return ret;
913 }
914 }
915
916 call->dir = debugfs_create_dir(call->name, d_events); 958 call->dir = debugfs_create_dir(call->name, d_events);
917 if (!call->dir) { 959 if (!call->dir) {
918 pr_warning("Could not create debugfs " 960 pr_warning("Could not create debugfs "
@@ -924,12 +966,12 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
924 entry = trace_create_file("enable", 0644, call->dir, call, 966 entry = trace_create_file("enable", 0644, call->dir, call,
925 enable); 967 enable);
926 968
927 if (call->id) 969 if (call->id && call->profile_enable)
928 entry = trace_create_file("id", 0444, call->dir, call, 970 entry = trace_create_file("id", 0444, call->dir, call,
929 id); 971 id);
930 972
931 if (call->define_fields) { 973 if (call->define_fields) {
932 ret = call->define_fields(); 974 ret = call->define_fields(call);
933 if (ret < 0) { 975 if (ret < 0) {
934 pr_warning("Could not initialize trace point" 976 pr_warning("Could not initialize trace point"
935 " events/%s\n", call->name); 977 " events/%s\n", call->name);
@@ -971,6 +1013,32 @@ struct ftrace_module_file_ops {
971 struct file_operations filter; 1013 struct file_operations filter;
972}; 1014};
973 1015
1016static void remove_subsystem_dir(const char *name)
1017{
1018 struct event_subsystem *system;
1019
1020 if (strcmp(name, TRACE_SYSTEM) == 0)
1021 return;
1022
1023 list_for_each_entry(system, &event_subsystems, list) {
1024 if (strcmp(system->name, name) == 0) {
1025 if (!--system->nr_events) {
1026 struct event_filter *filter = system->filter;
1027
1028 debugfs_remove_recursive(system->entry);
1029 list_del(&system->list);
1030 if (filter) {
1031 kfree(filter->filter_string);
1032 kfree(filter);
1033 }
1034 kfree(system->name);
1035 kfree(system);
1036 }
1037 break;
1038 }
1039 }
1040}
1041
974static struct ftrace_module_file_ops * 1042static struct ftrace_module_file_ops *
975trace_create_file_ops(struct module *mod) 1043trace_create_file_ops(struct module *mod)
976{ 1044{
@@ -1011,6 +1079,7 @@ static void trace_module_add_events(struct module *mod)
1011 struct ftrace_module_file_ops *file_ops = NULL; 1079 struct ftrace_module_file_ops *file_ops = NULL;
1012 struct ftrace_event_call *call, *start, *end; 1080 struct ftrace_event_call *call, *start, *end;
1013 struct dentry *d_events; 1081 struct dentry *d_events;
1082 int ret;
1014 1083
1015 start = mod->trace_events; 1084 start = mod->trace_events;
1016 end = mod->trace_events + mod->num_trace_events; 1085 end = mod->trace_events + mod->num_trace_events;
@@ -1026,7 +1095,15 @@ static void trace_module_add_events(struct module *mod)
1026 /* The linker may leave blanks */ 1095 /* The linker may leave blanks */
1027 if (!call->name) 1096 if (!call->name)
1028 continue; 1097 continue;
1029 1098 if (call->raw_init) {
1099 ret = call->raw_init();
1100 if (ret < 0) {
1101 if (ret != -ENOSYS)
1102 pr_warning("Could not initialize trace "
1103 "point events/%s\n", call->name);
1104 continue;
1105 }
1106 }
1030 /* 1107 /*
1031 * This module has events, create file ops for this module 1108 * This module has events, create file ops for this module
1032 * if not already done. 1109 * if not already done.
@@ -1061,6 +1138,7 @@ static void trace_module_remove_events(struct module *mod)
1061 list_del(&call->list); 1138 list_del(&call->list);
1062 trace_destroy_fields(call); 1139 trace_destroy_fields(call);
1063 destroy_preds(call); 1140 destroy_preds(call);
1141 remove_subsystem_dir(call->system);
1064 } 1142 }
1065 } 1143 }
1066 1144
@@ -1117,6 +1195,18 @@ struct notifier_block trace_module_nb = {
1117extern struct ftrace_event_call __start_ftrace_events[]; 1195extern struct ftrace_event_call __start_ftrace_events[];
1118extern struct ftrace_event_call __stop_ftrace_events[]; 1196extern struct ftrace_event_call __stop_ftrace_events[];
1119 1197
1198static char bootup_event_buf[COMMAND_LINE_SIZE] __initdata;
1199
1200static __init int setup_trace_event(char *str)
1201{
1202 strlcpy(bootup_event_buf, str, COMMAND_LINE_SIZE);
1203 ring_buffer_expanded = 1;
1204 tracing_selftest_disabled = 1;
1205
1206 return 1;
1207}
1208__setup("trace_event=", setup_trace_event);
1209
1120static __init int event_trace_init(void) 1210static __init int event_trace_init(void)
1121{ 1211{
1122 struct ftrace_event_call *call; 1212 struct ftrace_event_call *call;
@@ -1124,6 +1214,8 @@ static __init int event_trace_init(void)
1124 struct dentry *entry; 1214 struct dentry *entry;
1125 struct dentry *d_events; 1215 struct dentry *d_events;
1126 int ret; 1216 int ret;
1217 char *buf = bootup_event_buf;
1218 char *token;
1127 1219
1128 d_tracer = tracing_init_dentry(); 1220 d_tracer = tracing_init_dentry();
1129 if (!d_tracer) 1221 if (!d_tracer)
@@ -1163,12 +1255,34 @@ static __init int event_trace_init(void)
1163 /* The linker may leave blanks */ 1255 /* The linker may leave blanks */
1164 if (!call->name) 1256 if (!call->name)
1165 continue; 1257 continue;
1258 if (call->raw_init) {
1259 ret = call->raw_init();
1260 if (ret < 0) {
1261 if (ret != -ENOSYS)
1262 pr_warning("Could not initialize trace "
1263 "point events/%s\n", call->name);
1264 continue;
1265 }
1266 }
1166 list_add(&call->list, &ftrace_events); 1267 list_add(&call->list, &ftrace_events);
1167 event_create_dir(call, d_events, &ftrace_event_id_fops, 1268 event_create_dir(call, d_events, &ftrace_event_id_fops,
1168 &ftrace_enable_fops, &ftrace_event_filter_fops, 1269 &ftrace_enable_fops, &ftrace_event_filter_fops,
1169 &ftrace_event_format_fops); 1270 &ftrace_event_format_fops);
1170 } 1271 }
1171 1272
1273 while (true) {
1274 token = strsep(&buf, ",");
1275
1276 if (!token)
1277 break;
1278 if (!*token)
1279 continue;
1280
1281 ret = ftrace_set_clr_event(token, 1);
1282 if (ret)
1283 pr_warning("Failed to enable trace event: %s\n", token);
1284 }
1285
1172 ret = register_module_notifier(&trace_module_nb); 1286 ret = register_module_notifier(&trace_module_nb);
1173 if (ret) 1287 if (ret)
1174 pr_warning("Failed to register trace events module notifier\n"); 1288 pr_warning("Failed to register trace events module notifier\n");
@@ -1324,6 +1438,7 @@ static void
1324function_test_events_call(unsigned long ip, unsigned long parent_ip) 1438function_test_events_call(unsigned long ip, unsigned long parent_ip)
1325{ 1439{
1326 struct ring_buffer_event *event; 1440 struct ring_buffer_event *event;
1441 struct ring_buffer *buffer;
1327 struct ftrace_entry *entry; 1442 struct ftrace_entry *entry;
1328 unsigned long flags; 1443 unsigned long flags;
1329 long disabled; 1444 long disabled;
@@ -1341,7 +1456,8 @@ function_test_events_call(unsigned long ip, unsigned long parent_ip)
1341 1456
1342 local_save_flags(flags); 1457 local_save_flags(flags);
1343 1458
1344 event = trace_current_buffer_lock_reserve(TRACE_FN, sizeof(*entry), 1459 event = trace_current_buffer_lock_reserve(&buffer,
1460 TRACE_FN, sizeof(*entry),
1345 flags, pc); 1461 flags, pc);
1346 if (!event) 1462 if (!event)
1347 goto out; 1463 goto out;
@@ -1349,7 +1465,7 @@ function_test_events_call(unsigned long ip, unsigned long parent_ip)
1349 entry->ip = ip; 1465 entry->ip = ip;
1350 entry->parent_ip = parent_ip; 1466 entry->parent_ip = parent_ip;
1351 1467
1352 trace_nowake_buffer_unlock_commit(event, flags, pc); 1468 trace_nowake_buffer_unlock_commit(buffer, event, flags, pc);
1353 1469
1354 out: 1470 out:
1355 atomic_dec(&per_cpu(test_event_disable, cpu)); 1471 atomic_dec(&per_cpu(test_event_disable, cpu));
@@ -1376,10 +1492,10 @@ static __init void event_trace_self_test_with_function(void)
1376 1492
1377static __init int event_trace_self_tests_init(void) 1493static __init int event_trace_self_tests_init(void)
1378{ 1494{
1379 1495 if (!tracing_selftest_disabled) {
1380 event_trace_self_tests(); 1496 event_trace_self_tests();
1381 1497 event_trace_self_test_with_function();
1382 event_trace_self_test_with_function(); 1498 }
1383 1499
1384 return 0; 1500 return 0;
1385} 1501}
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index db6e54bdb596..93660fbbf629 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -27,8 +27,6 @@
27#include "trace.h" 27#include "trace.h"
28#include "trace_output.h" 28#include "trace_output.h"
29 29
30static DEFINE_MUTEX(filter_mutex);
31
32enum filter_op_ids 30enum filter_op_ids
33{ 31{
34 OP_OR, 32 OP_OR,
@@ -165,6 +163,20 @@ static int filter_pred_string(struct filter_pred *pred, void *event,
165 return match; 163 return match;
166} 164}
167 165
166/* Filter predicate for char * pointers */
167static int filter_pred_pchar(struct filter_pred *pred, void *event,
168 int val1, int val2)
169{
170 char **addr = (char **)(event + pred->offset);
171 int cmp, match;
172
173 cmp = strncmp(*addr, pred->str_val, pred->str_len);
174
175 match = (!cmp) ^ pred->not;
176
177 return match;
178}
179
168/* 180/*
169 * Filter predicate for dynamic sized arrays of characters. 181 * Filter predicate for dynamic sized arrays of characters.
170 * These are implemented through a list of strings at the end 182 * These are implemented through a list of strings at the end
@@ -178,11 +190,13 @@ static int filter_pred_string(struct filter_pred *pred, void *event,
178static int filter_pred_strloc(struct filter_pred *pred, void *event, 190static int filter_pred_strloc(struct filter_pred *pred, void *event,
179 int val1, int val2) 191 int val1, int val2)
180{ 192{
181 int str_loc = *(int *)(event + pred->offset); 193 u32 str_item = *(u32 *)(event + pred->offset);
194 int str_loc = str_item & 0xffff;
195 int str_len = str_item >> 16;
182 char *addr = (char *)(event + str_loc); 196 char *addr = (char *)(event + str_loc);
183 int cmp, match; 197 int cmp, match;
184 198
185 cmp = strncmp(addr, pred->str_val, pred->str_len); 199 cmp = strncmp(addr, pred->str_val, str_len);
186 200
187 match = (!cmp) ^ pred->not; 201 match = (!cmp) ^ pred->not;
188 202
@@ -294,12 +308,12 @@ void print_event_filter(struct ftrace_event_call *call, struct trace_seq *s)
294{ 308{
295 struct event_filter *filter = call->filter; 309 struct event_filter *filter = call->filter;
296 310
297 mutex_lock(&filter_mutex); 311 mutex_lock(&event_mutex);
298 if (filter->filter_string) 312 if (filter && filter->filter_string)
299 trace_seq_printf(s, "%s\n", filter->filter_string); 313 trace_seq_printf(s, "%s\n", filter->filter_string);
300 else 314 else
301 trace_seq_printf(s, "none\n"); 315 trace_seq_printf(s, "none\n");
302 mutex_unlock(&filter_mutex); 316 mutex_unlock(&event_mutex);
303} 317}
304 318
305void print_subsystem_event_filter(struct event_subsystem *system, 319void print_subsystem_event_filter(struct event_subsystem *system,
@@ -307,12 +321,12 @@ void print_subsystem_event_filter(struct event_subsystem *system,
307{ 321{
308 struct event_filter *filter = system->filter; 322 struct event_filter *filter = system->filter;
309 323
310 mutex_lock(&filter_mutex); 324 mutex_lock(&event_mutex);
311 if (filter->filter_string) 325 if (filter && filter->filter_string)
312 trace_seq_printf(s, "%s\n", filter->filter_string); 326 trace_seq_printf(s, "%s\n", filter->filter_string);
313 else 327 else
314 trace_seq_printf(s, "none\n"); 328 trace_seq_printf(s, "none\n");
315 mutex_unlock(&filter_mutex); 329 mutex_unlock(&event_mutex);
316} 330}
317 331
318static struct ftrace_event_field * 332static struct ftrace_event_field *
@@ -376,26 +390,32 @@ void destroy_preds(struct ftrace_event_call *call)
376 struct event_filter *filter = call->filter; 390 struct event_filter *filter = call->filter;
377 int i; 391 int i;
378 392
393 if (!filter)
394 return;
395
379 for (i = 0; i < MAX_FILTER_PRED; i++) { 396 for (i = 0; i < MAX_FILTER_PRED; i++) {
380 if (filter->preds[i]) 397 if (filter->preds[i])
381 filter_free_pred(filter->preds[i]); 398 filter_free_pred(filter->preds[i]);
382 } 399 }
383 kfree(filter->preds); 400 kfree(filter->preds);
401 kfree(filter->filter_string);
384 kfree(filter); 402 kfree(filter);
385 call->filter = NULL; 403 call->filter = NULL;
386} 404}
387 405
388int init_preds(struct ftrace_event_call *call) 406static int init_preds(struct ftrace_event_call *call)
389{ 407{
390 struct event_filter *filter; 408 struct event_filter *filter;
391 struct filter_pred *pred; 409 struct filter_pred *pred;
392 int i; 410 int i;
393 411
412 if (call->filter)
413 return 0;
414
394 filter = call->filter = kzalloc(sizeof(*filter), GFP_KERNEL); 415 filter = call->filter = kzalloc(sizeof(*filter), GFP_KERNEL);
395 if (!call->filter) 416 if (!call->filter)
396 return -ENOMEM; 417 return -ENOMEM;
397 418
398 call->filter_active = 0;
399 filter->n_preds = 0; 419 filter->n_preds = 0;
400 420
401 filter->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred), GFP_KERNEL); 421 filter->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred), GFP_KERNEL);
@@ -417,33 +437,56 @@ oom:
417 437
418 return -ENOMEM; 438 return -ENOMEM;
419} 439}
420EXPORT_SYMBOL_GPL(init_preds);
421 440
422static void filter_free_subsystem_preds(struct event_subsystem *system) 441static int init_subsystem_preds(struct event_subsystem *system)
423{ 442{
424 struct event_filter *filter = system->filter;
425 struct ftrace_event_call *call; 443 struct ftrace_event_call *call;
426 int i; 444 int err;
427 445
428 if (filter->n_preds) { 446 list_for_each_entry(call, &ftrace_events, list) {
429 for (i = 0; i < filter->n_preds; i++) 447 if (!call->define_fields)
430 filter_free_pred(filter->preds[i]); 448 continue;
431 kfree(filter->preds); 449
432 filter->preds = NULL; 450 if (strcmp(call->system, system->name) != 0)
433 filter->n_preds = 0; 451 continue;
452
453 err = init_preds(call);
454 if (err)
455 return err;
434 } 456 }
435 457
436 mutex_lock(&event_mutex); 458 return 0;
459}
460
461enum {
462 FILTER_DISABLE_ALL,
463 FILTER_INIT_NO_RESET,
464 FILTER_SKIP_NO_RESET,
465};
466
467static void filter_free_subsystem_preds(struct event_subsystem *system,
468 int flag)
469{
470 struct ftrace_event_call *call;
471
437 list_for_each_entry(call, &ftrace_events, list) { 472 list_for_each_entry(call, &ftrace_events, list) {
438 if (!call->define_fields) 473 if (!call->define_fields)
439 continue; 474 continue;
440 475
441 if (!strcmp(call->system, system->name)) { 476 if (strcmp(call->system, system->name) != 0)
442 filter_disable_preds(call); 477 continue;
443 remove_filter_string(call->filter); 478
479 if (flag == FILTER_INIT_NO_RESET) {
480 call->filter->no_reset = false;
481 continue;
444 } 482 }
483
484 if (flag == FILTER_SKIP_NO_RESET && call->filter->no_reset)
485 continue;
486
487 filter_disable_preds(call);
488 remove_filter_string(call->filter);
445 } 489 }
446 mutex_unlock(&event_mutex);
447} 490}
448 491
449static int filter_add_pred_fn(struct filter_parse_state *ps, 492static int filter_add_pred_fn(struct filter_parse_state *ps,
@@ -471,12 +514,7 @@ static int filter_add_pred_fn(struct filter_parse_state *ps,
471 return 0; 514 return 0;
472} 515}
473 516
474enum { 517int filter_assign_type(const char *type)
475 FILTER_STATIC_STRING = 1,
476 FILTER_DYN_STRING
477};
478
479static int is_string_field(const char *type)
480{ 518{
481 if (strstr(type, "__data_loc") && strstr(type, "char")) 519 if (strstr(type, "__data_loc") && strstr(type, "char"))
482 return FILTER_DYN_STRING; 520 return FILTER_DYN_STRING;
@@ -484,12 +522,19 @@ static int is_string_field(const char *type)
484 if (strchr(type, '[') && strstr(type, "char")) 522 if (strchr(type, '[') && strstr(type, "char"))
485 return FILTER_STATIC_STRING; 523 return FILTER_STATIC_STRING;
486 524
487 return 0; 525 return FILTER_OTHER;
526}
527
528static bool is_string_field(struct ftrace_event_field *field)
529{
530 return field->filter_type == FILTER_DYN_STRING ||
531 field->filter_type == FILTER_STATIC_STRING ||
532 field->filter_type == FILTER_PTR_STRING;
488} 533}
489 534
490static int is_legal_op(struct ftrace_event_field *field, int op) 535static int is_legal_op(struct ftrace_event_field *field, int op)
491{ 536{
492 if (is_string_field(field->type) && (op != OP_EQ && op != OP_NE)) 537 if (is_string_field(field) && (op != OP_EQ && op != OP_NE))
493 return 0; 538 return 0;
494 539
495 return 1; 540 return 1;
@@ -540,21 +585,24 @@ static filter_pred_fn_t select_comparison_fn(int op, int field_size,
540 585
541static int filter_add_pred(struct filter_parse_state *ps, 586static int filter_add_pred(struct filter_parse_state *ps,
542 struct ftrace_event_call *call, 587 struct ftrace_event_call *call,
543 struct filter_pred *pred) 588 struct filter_pred *pred,
589 bool dry_run)
544{ 590{
545 struct ftrace_event_field *field; 591 struct ftrace_event_field *field;
546 filter_pred_fn_t fn; 592 filter_pred_fn_t fn;
547 unsigned long long val; 593 unsigned long long val;
548 int string_type; 594 int ret;
549 595
550 pred->fn = filter_pred_none; 596 pred->fn = filter_pred_none;
551 597
552 if (pred->op == OP_AND) { 598 if (pred->op == OP_AND) {
553 pred->pop_n = 2; 599 pred->pop_n = 2;
554 return filter_add_pred_fn(ps, call, pred, filter_pred_and); 600 fn = filter_pred_and;
601 goto add_pred_fn;
555 } else if (pred->op == OP_OR) { 602 } else if (pred->op == OP_OR) {
556 pred->pop_n = 2; 603 pred->pop_n = 2;
557 return filter_add_pred_fn(ps, call, pred, filter_pred_or); 604 fn = filter_pred_or;
605 goto add_pred_fn;
558 } 606 }
559 607
560 field = find_event_field(call, pred->field_name); 608 field = find_event_field(call, pred->field_name);
@@ -570,62 +618,55 @@ static int filter_add_pred(struct filter_parse_state *ps,
570 return -EINVAL; 618 return -EINVAL;
571 } 619 }
572 620
573 string_type = is_string_field(field->type); 621 if (is_string_field(field)) {
574 if (string_type) { 622 pred->str_len = field->size;
575 if (string_type == FILTER_STATIC_STRING) 623
624 if (field->filter_type == FILTER_STATIC_STRING)
576 fn = filter_pred_string; 625 fn = filter_pred_string;
577 else 626 else if (field->filter_type == FILTER_DYN_STRING)
578 fn = filter_pred_strloc; 627 fn = filter_pred_strloc;
579 pred->str_len = field->size; 628 else {
580 if (pred->op == OP_NE) 629 fn = filter_pred_pchar;
581 pred->not = 1; 630 pred->str_len = strlen(pred->str_val);
582 return filter_add_pred_fn(ps, call, pred, fn); 631 }
583 } else { 632 } else {
584 if (strict_strtoull(pred->str_val, 0, &val)) { 633 if (field->is_signed)
634 ret = strict_strtoll(pred->str_val, 0, &val);
635 else
636 ret = strict_strtoull(pred->str_val, 0, &val);
637 if (ret) {
585 parse_error(ps, FILT_ERR_ILLEGAL_INTVAL, 0); 638 parse_error(ps, FILT_ERR_ILLEGAL_INTVAL, 0);
586 return -EINVAL; 639 return -EINVAL;
587 } 640 }
588 pred->val = val; 641 pred->val = val;
589 }
590 642
591 fn = select_comparison_fn(pred->op, field->size, field->is_signed); 643 fn = select_comparison_fn(pred->op, field->size,
592 if (!fn) { 644 field->is_signed);
593 parse_error(ps, FILT_ERR_INVALID_OP, 0); 645 if (!fn) {
594 return -EINVAL; 646 parse_error(ps, FILT_ERR_INVALID_OP, 0);
647 return -EINVAL;
648 }
595 } 649 }
596 650
597 if (pred->op == OP_NE) 651 if (pred->op == OP_NE)
598 pred->not = 1; 652 pred->not = 1;
599 653
600 return filter_add_pred_fn(ps, call, pred, fn); 654add_pred_fn:
655 if (!dry_run)
656 return filter_add_pred_fn(ps, call, pred, fn);
657 return 0;
601} 658}
602 659
603static int filter_add_subsystem_pred(struct filter_parse_state *ps, 660static int filter_add_subsystem_pred(struct filter_parse_state *ps,
604 struct event_subsystem *system, 661 struct event_subsystem *system,
605 struct filter_pred *pred, 662 struct filter_pred *pred,
606 char *filter_string) 663 char *filter_string,
664 bool dry_run)
607{ 665{
608 struct event_filter *filter = system->filter;
609 struct ftrace_event_call *call; 666 struct ftrace_event_call *call;
610 int err = 0; 667 int err = 0;
668 bool fail = true;
611 669
612 if (!filter->preds) {
613 filter->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred),
614 GFP_KERNEL);
615
616 if (!filter->preds)
617 return -ENOMEM;
618 }
619
620 if (filter->n_preds == MAX_FILTER_PRED) {
621 parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0);
622 return -ENOSPC;
623 }
624
625 filter->preds[filter->n_preds] = pred;
626 filter->n_preds++;
627
628 mutex_lock(&event_mutex);
629 list_for_each_entry(call, &ftrace_events, list) { 670 list_for_each_entry(call, &ftrace_events, list) {
630 671
631 if (!call->define_fields) 672 if (!call->define_fields)
@@ -634,18 +675,24 @@ static int filter_add_subsystem_pred(struct filter_parse_state *ps,
634 if (strcmp(call->system, system->name)) 675 if (strcmp(call->system, system->name))
635 continue; 676 continue;
636 677
637 err = filter_add_pred(ps, call, pred); 678 if (call->filter->no_reset)
638 if (err) { 679 continue;
639 mutex_unlock(&event_mutex); 680
640 filter_free_subsystem_preds(system); 681 err = filter_add_pred(ps, call, pred, dry_run);
641 parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0); 682 if (err)
642 goto out; 683 call->filter->no_reset = true;
643 } 684 else
644 replace_filter_string(call->filter, filter_string); 685 fail = false;
686
687 if (!dry_run)
688 replace_filter_string(call->filter, filter_string);
645 } 689 }
646 mutex_unlock(&event_mutex); 690
647out: 691 if (fail) {
648 return err; 692 parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0);
693 return err;
694 }
695 return 0;
649} 696}
650 697
651static void parse_init(struct filter_parse_state *ps, 698static void parse_init(struct filter_parse_state *ps,
@@ -1004,12 +1051,14 @@ static int check_preds(struct filter_parse_state *ps)
1004static int replace_preds(struct event_subsystem *system, 1051static int replace_preds(struct event_subsystem *system,
1005 struct ftrace_event_call *call, 1052 struct ftrace_event_call *call,
1006 struct filter_parse_state *ps, 1053 struct filter_parse_state *ps,
1007 char *filter_string) 1054 char *filter_string,
1055 bool dry_run)
1008{ 1056{
1009 char *operand1 = NULL, *operand2 = NULL; 1057 char *operand1 = NULL, *operand2 = NULL;
1010 struct filter_pred *pred; 1058 struct filter_pred *pred;
1011 struct postfix_elt *elt; 1059 struct postfix_elt *elt;
1012 int err; 1060 int err;
1061 int n_preds = 0;
1013 1062
1014 err = check_preds(ps); 1063 err = check_preds(ps);
1015 if (err) 1064 if (err)
@@ -1028,19 +1077,14 @@ static int replace_preds(struct event_subsystem *system,
1028 continue; 1077 continue;
1029 } 1078 }
1030 1079
1080 if (n_preds++ == MAX_FILTER_PRED) {
1081 parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0);
1082 return -ENOSPC;
1083 }
1084
1031 if (elt->op == OP_AND || elt->op == OP_OR) { 1085 if (elt->op == OP_AND || elt->op == OP_OR) {
1032 pred = create_logical_pred(elt->op); 1086 pred = create_logical_pred(elt->op);
1033 if (call) { 1087 goto add_pred;
1034 err = filter_add_pred(ps, call, pred);
1035 filter_free_pred(pred);
1036 } else
1037 err = filter_add_subsystem_pred(ps, system,
1038 pred, filter_string);
1039 if (err)
1040 return err;
1041
1042 operand1 = operand2 = NULL;
1043 continue;
1044 } 1088 }
1045 1089
1046 if (!operand1 || !operand2) { 1090 if (!operand1 || !operand2) {
@@ -1049,12 +1093,15 @@ static int replace_preds(struct event_subsystem *system,
1049 } 1093 }
1050 1094
1051 pred = create_pred(elt->op, operand1, operand2); 1095 pred = create_pred(elt->op, operand1, operand2);
1052 if (call) { 1096add_pred:
1053 err = filter_add_pred(ps, call, pred); 1097 if (!pred)
1054 filter_free_pred(pred); 1098 return -ENOMEM;
1055 } else 1099 if (call)
1100 err = filter_add_pred(ps, call, pred, false);
1101 else
1056 err = filter_add_subsystem_pred(ps, system, pred, 1102 err = filter_add_subsystem_pred(ps, system, pred,
1057 filter_string); 1103 filter_string, dry_run);
1104 filter_free_pred(pred);
1058 if (err) 1105 if (err)
1059 return err; 1106 return err;
1060 1107
@@ -1070,12 +1117,16 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
1070 1117
1071 struct filter_parse_state *ps; 1118 struct filter_parse_state *ps;
1072 1119
1073 mutex_lock(&filter_mutex); 1120 mutex_lock(&event_mutex);
1121
1122 err = init_preds(call);
1123 if (err)
1124 goto out_unlock;
1074 1125
1075 if (!strcmp(strstrip(filter_string), "0")) { 1126 if (!strcmp(strstrip(filter_string), "0")) {
1076 filter_disable_preds(call); 1127 filter_disable_preds(call);
1077 remove_filter_string(call->filter); 1128 remove_filter_string(call->filter);
1078 mutex_unlock(&filter_mutex); 1129 mutex_unlock(&event_mutex);
1079 return 0; 1130 return 0;
1080 } 1131 }
1081 1132
@@ -1094,7 +1145,7 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
1094 goto out; 1145 goto out;
1095 } 1146 }
1096 1147
1097 err = replace_preds(NULL, call, ps, filter_string); 1148 err = replace_preds(NULL, call, ps, filter_string, false);
1098 if (err) 1149 if (err)
1099 append_filter_err(ps, call->filter); 1150 append_filter_err(ps, call->filter);
1100 1151
@@ -1103,7 +1154,7 @@ out:
1103 postfix_clear(ps); 1154 postfix_clear(ps);
1104 kfree(ps); 1155 kfree(ps);
1105out_unlock: 1156out_unlock:
1106 mutex_unlock(&filter_mutex); 1157 mutex_unlock(&event_mutex);
1107 1158
1108 return err; 1159 return err;
1109} 1160}
@@ -1115,12 +1166,16 @@ int apply_subsystem_event_filter(struct event_subsystem *system,
1115 1166
1116 struct filter_parse_state *ps; 1167 struct filter_parse_state *ps;
1117 1168
1118 mutex_lock(&filter_mutex); 1169 mutex_lock(&event_mutex);
1170
1171 err = init_subsystem_preds(system);
1172 if (err)
1173 goto out_unlock;
1119 1174
1120 if (!strcmp(strstrip(filter_string), "0")) { 1175 if (!strcmp(strstrip(filter_string), "0")) {
1121 filter_free_subsystem_preds(system); 1176 filter_free_subsystem_preds(system, FILTER_DISABLE_ALL);
1122 remove_filter_string(system->filter); 1177 remove_filter_string(system->filter);
1123 mutex_unlock(&filter_mutex); 1178 mutex_unlock(&event_mutex);
1124 return 0; 1179 return 0;
1125 } 1180 }
1126 1181
@@ -1129,7 +1184,6 @@ int apply_subsystem_event_filter(struct event_subsystem *system,
1129 if (!ps) 1184 if (!ps)
1130 goto out_unlock; 1185 goto out_unlock;
1131 1186
1132 filter_free_subsystem_preds(system);
1133 replace_filter_string(system->filter, filter_string); 1187 replace_filter_string(system->filter, filter_string);
1134 1188
1135 parse_init(ps, filter_ops, filter_string); 1189 parse_init(ps, filter_ops, filter_string);
@@ -1139,16 +1193,30 @@ int apply_subsystem_event_filter(struct event_subsystem *system,
1139 goto out; 1193 goto out;
1140 } 1194 }
1141 1195
1142 err = replace_preds(system, NULL, ps, filter_string); 1196 filter_free_subsystem_preds(system, FILTER_INIT_NO_RESET);
1143 if (err) 1197
1198 /* try to see the filter can be applied to which events */
1199 err = replace_preds(system, NULL, ps, filter_string, true);
1200 if (err) {
1144 append_filter_err(ps, system->filter); 1201 append_filter_err(ps, system->filter);
1202 goto out;
1203 }
1204
1205 filter_free_subsystem_preds(system, FILTER_SKIP_NO_RESET);
1206
1207 /* really apply the filter to the events */
1208 err = replace_preds(system, NULL, ps, filter_string, false);
1209 if (err) {
1210 append_filter_err(ps, system->filter);
1211 filter_free_subsystem_preds(system, 2);
1212 }
1145 1213
1146out: 1214out:
1147 filter_opstack_clear(ps); 1215 filter_opstack_clear(ps);
1148 postfix_clear(ps); 1216 postfix_clear(ps);
1149 kfree(ps); 1217 kfree(ps);
1150out_unlock: 1218out_unlock:
1151 mutex_unlock(&filter_mutex); 1219 mutex_unlock(&event_mutex);
1152 1220
1153 return err; 1221 return err;
1154} 1222}
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index d06cf898dc86..df1bf6e48bb9 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -60,7 +60,8 @@ extern void __bad_type_size(void);
60#undef TRACE_EVENT_FORMAT 60#undef TRACE_EVENT_FORMAT
61#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \ 61#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \
62static int \ 62static int \
63ftrace_format_##call(struct trace_seq *s) \ 63ftrace_format_##call(struct ftrace_event_call *unused, \
64 struct trace_seq *s) \
64{ \ 65{ \
65 struct args field; \ 66 struct args field; \
66 int ret; \ 67 int ret; \
@@ -76,7 +77,8 @@ ftrace_format_##call(struct trace_seq *s) \
76#define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct, \ 77#define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct, \
77 tpfmt) \ 78 tpfmt) \
78static int \ 79static int \
79ftrace_format_##call(struct trace_seq *s) \ 80ftrace_format_##call(struct ftrace_event_call *unused, \
81 struct trace_seq *s) \
80{ \ 82{ \
81 struct args field; \ 83 struct args field; \
82 int ret; \ 84 int ret; \
@@ -117,7 +119,7 @@ ftrace_format_##call(struct trace_seq *s) \
117 119
118#undef TRACE_EVENT_FORMAT 120#undef TRACE_EVENT_FORMAT
119#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \ 121#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \
120int ftrace_define_fields_##call(void); \ 122int ftrace_define_fields_##call(struct ftrace_event_call *event_call); \
121static int ftrace_raw_init_event_##call(void); \ 123static int ftrace_raw_init_event_##call(void); \
122 \ 124 \
123struct ftrace_event_call __used \ 125struct ftrace_event_call __used \
@@ -133,7 +135,6 @@ __attribute__((section("_ftrace_events"))) event_##call = { \
133static int ftrace_raw_init_event_##call(void) \ 135static int ftrace_raw_init_event_##call(void) \
134{ \ 136{ \
135 INIT_LIST_HEAD(&event_##call.fields); \ 137 INIT_LIST_HEAD(&event_##call.fields); \
136 init_preds(&event_##call); \
137 return 0; \ 138 return 0; \
138} \ 139} \
139 140
@@ -156,7 +157,8 @@ __attribute__((section("_ftrace_events"))) event_##call = { \
156#define TRACE_FIELD(type, item, assign) \ 157#define TRACE_FIELD(type, item, assign) \
157 ret = trace_define_field(event_call, #type, #item, \ 158 ret = trace_define_field(event_call, #type, #item, \
158 offsetof(typeof(field), item), \ 159 offsetof(typeof(field), item), \
159 sizeof(field.item), is_signed_type(type)); \ 160 sizeof(field.item), \
161 is_signed_type(type), FILTER_OTHER); \
160 if (ret) \ 162 if (ret) \
161 return ret; 163 return ret;
162 164
@@ -164,7 +166,7 @@ __attribute__((section("_ftrace_events"))) event_##call = { \
164#define TRACE_FIELD_SPECIAL(type, item, len, cmd) \ 166#define TRACE_FIELD_SPECIAL(type, item, len, cmd) \
165 ret = trace_define_field(event_call, #type "[" #len "]", #item, \ 167 ret = trace_define_field(event_call, #type "[" #len "]", #item, \
166 offsetof(typeof(field), item), \ 168 offsetof(typeof(field), item), \
167 sizeof(field.item), 0); \ 169 sizeof(field.item), 0, FILTER_OTHER); \
168 if (ret) \ 170 if (ret) \
169 return ret; 171 return ret;
170 172
@@ -172,7 +174,8 @@ __attribute__((section("_ftrace_events"))) event_##call = { \
172#define TRACE_FIELD_SIGN(type, item, assign, is_signed) \ 174#define TRACE_FIELD_SIGN(type, item, assign, is_signed) \
173 ret = trace_define_field(event_call, #type, #item, \ 175 ret = trace_define_field(event_call, #type, #item, \
174 offsetof(typeof(field), item), \ 176 offsetof(typeof(field), item), \
175 sizeof(field.item), is_signed); \ 177 sizeof(field.item), is_signed, \
178 FILTER_OTHER); \
176 if (ret) \ 179 if (ret) \
177 return ret; 180 return ret;
178 181
@@ -182,17 +185,14 @@ __attribute__((section("_ftrace_events"))) event_##call = { \
182#undef TRACE_EVENT_FORMAT 185#undef TRACE_EVENT_FORMAT
183#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \ 186#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \
184int \ 187int \
185ftrace_define_fields_##call(void) \ 188ftrace_define_fields_##call(struct ftrace_event_call *event_call) \
186{ \ 189{ \
187 struct ftrace_event_call *event_call = &event_##call; \
188 struct args field; \ 190 struct args field; \
189 int ret; \ 191 int ret; \
190 \ 192 \
191 __common_field(unsigned char, type, 0); \ 193 ret = trace_define_common_fields(event_call); \
192 __common_field(unsigned char, flags, 0); \ 194 if (ret) \
193 __common_field(unsigned char, preempt_count, 0); \ 195 return ret; \
194 __common_field(int, pid, 1); \
195 __common_field(int, tgid, 1); \
196 \ 196 \
197 tstruct; \ 197 tstruct; \
198 \ 198 \
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index c9a0b7df44ff..5b01b94518fc 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -193,9 +193,11 @@ static void tracing_start_function_trace(void)
193static void tracing_stop_function_trace(void) 193static void tracing_stop_function_trace(void)
194{ 194{
195 ftrace_function_enabled = 0; 195 ftrace_function_enabled = 0;
196 /* OK if they are not registered */ 196
197 unregister_ftrace_function(&trace_stack_ops); 197 if (func_flags.val & TRACE_FUNC_OPT_STACK)
198 unregister_ftrace_function(&trace_ops); 198 unregister_ftrace_function(&trace_stack_ops);
199 else
200 unregister_ftrace_function(&trace_ops);
199} 201}
200 202
201static int func_set_flag(u32 old_flags, u32 bit, int set) 203static int func_set_flag(u32 old_flags, u32 bit, int set)
@@ -286,11 +288,9 @@ static int
286ftrace_trace_onoff_print(struct seq_file *m, unsigned long ip, 288ftrace_trace_onoff_print(struct seq_file *m, unsigned long ip,
287 struct ftrace_probe_ops *ops, void *data) 289 struct ftrace_probe_ops *ops, void *data)
288{ 290{
289 char str[KSYM_SYMBOL_LEN];
290 long count = (long)data; 291 long count = (long)data;
291 292
292 kallsyms_lookup(ip, NULL, NULL, NULL, str); 293 seq_printf(m, "%pf:", (void *)ip);
293 seq_printf(m, "%s:", str);
294 294
295 if (ops == &traceon_probe_ops) 295 if (ops == &traceon_probe_ops)
296 seq_printf(m, "traceon"); 296 seq_printf(m, "traceon");
@@ -300,8 +300,7 @@ ftrace_trace_onoff_print(struct seq_file *m, unsigned long ip,
300 if (count == -1) 300 if (count == -1)
301 seq_printf(m, ":unlimited\n"); 301 seq_printf(m, ":unlimited\n");
302 else 302 else
303 seq_printf(m, ":count=%ld", count); 303 seq_printf(m, ":count=%ld\n", count);
304 seq_putc(m, '\n');
305 304
306 return 0; 305 return 0;
307} 306}
@@ -362,7 +361,7 @@ ftrace_trace_onoff_callback(char *glob, char *cmd, char *param, int enable)
362 out_reg: 361 out_reg:
363 ret = register_ftrace_function_probe(glob, ops, count); 362 ret = register_ftrace_function_probe(glob, ops, count);
364 363
365 return ret; 364 return ret < 0 ? ret : 0;
366} 365}
367 366
368static struct ftrace_func_command ftrace_traceon_cmd = { 367static struct ftrace_func_command ftrace_traceon_cmd = {
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 8b592418d8b2..b3749a2c3132 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -52,12 +52,13 @@ static struct tracer_flags tracer_flags = {
52 .opts = trace_opts 52 .opts = trace_opts
53}; 53};
54 54
55/* pid on the last trace processed */ 55static struct trace_array *graph_array;
56 56
57 57
58/* Add a function return address to the trace stack on thread info.*/ 58/* Add a function return address to the trace stack on thread info.*/
59int 59int
60ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth) 60ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth,
61 unsigned long frame_pointer)
61{ 62{
62 unsigned long long calltime; 63 unsigned long long calltime;
63 int index; 64 int index;
@@ -85,6 +86,7 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth)
85 current->ret_stack[index].func = func; 86 current->ret_stack[index].func = func;
86 current->ret_stack[index].calltime = calltime; 87 current->ret_stack[index].calltime = calltime;
87 current->ret_stack[index].subtime = 0; 88 current->ret_stack[index].subtime = 0;
89 current->ret_stack[index].fp = frame_pointer;
88 *depth = index; 90 *depth = index;
89 91
90 return 0; 92 return 0;
@@ -92,7 +94,8 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth)
92 94
93/* Retrieve a function return address to the trace stack on thread info.*/ 95/* Retrieve a function return address to the trace stack on thread info.*/
94static void 96static void
95ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret) 97ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret,
98 unsigned long frame_pointer)
96{ 99{
97 int index; 100 int index;
98 101
@@ -106,6 +109,31 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret)
106 return; 109 return;
107 } 110 }
108 111
112#ifdef CONFIG_HAVE_FUNCTION_GRAPH_FP_TEST
113 /*
114 * The arch may choose to record the frame pointer used
115 * and check it here to make sure that it is what we expect it
116 * to be. If gcc does not set the place holder of the return
117 * address in the frame pointer, and does a copy instead, then
118 * the function graph trace will fail. This test detects this
119 * case.
120 *
121 * Currently, x86_32 with optimize for size (-Os) makes the latest
122 * gcc do the above.
123 */
124 if (unlikely(current->ret_stack[index].fp != frame_pointer)) {
125 ftrace_graph_stop();
126 WARN(1, "Bad frame pointer: expected %lx, received %lx\n"
127 " from func %pF return to %lx\n",
128 current->ret_stack[index].fp,
129 frame_pointer,
130 (void *)current->ret_stack[index].func,
131 current->ret_stack[index].ret);
132 *ret = (unsigned long)panic;
133 return;
134 }
135#endif
136
109 *ret = current->ret_stack[index].ret; 137 *ret = current->ret_stack[index].ret;
110 trace->func = current->ret_stack[index].func; 138 trace->func = current->ret_stack[index].func;
111 trace->calltime = current->ret_stack[index].calltime; 139 trace->calltime = current->ret_stack[index].calltime;
@@ -117,12 +145,12 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret)
117 * Send the trace to the ring-buffer. 145 * Send the trace to the ring-buffer.
118 * @return the original return address. 146 * @return the original return address.
119 */ 147 */
120unsigned long ftrace_return_to_handler(void) 148unsigned long ftrace_return_to_handler(unsigned long frame_pointer)
121{ 149{
122 struct ftrace_graph_ret trace; 150 struct ftrace_graph_ret trace;
123 unsigned long ret; 151 unsigned long ret;
124 152
125 ftrace_pop_return_trace(&trace, &ret); 153 ftrace_pop_return_trace(&trace, &ret, frame_pointer);
126 trace.rettime = trace_clock_local(); 154 trace.rettime = trace_clock_local();
127 ftrace_graph_return(&trace); 155 ftrace_graph_return(&trace);
128 barrier(); 156 barrier();
@@ -138,10 +166,123 @@ unsigned long ftrace_return_to_handler(void)
138 return ret; 166 return ret;
139} 167}
140 168
169static int __trace_graph_entry(struct trace_array *tr,
170 struct ftrace_graph_ent *trace,
171 unsigned long flags,
172 int pc)
173{
174 struct ftrace_event_call *call = &event_funcgraph_entry;
175 struct ring_buffer_event *event;
176 struct ring_buffer *buffer = tr->buffer;
177 struct ftrace_graph_ent_entry *entry;
178
179 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
180 return 0;
181
182 event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_ENT,
183 sizeof(*entry), flags, pc);
184 if (!event)
185 return 0;
186 entry = ring_buffer_event_data(event);
187 entry->graph_ent = *trace;
188 if (!filter_current_check_discard(buffer, call, entry, event))
189 ring_buffer_unlock_commit(buffer, event);
190
191 return 1;
192}
193
194int trace_graph_entry(struct ftrace_graph_ent *trace)
195{
196 struct trace_array *tr = graph_array;
197 struct trace_array_cpu *data;
198 unsigned long flags;
199 long disabled;
200 int ret;
201 int cpu;
202 int pc;
203
204 if (unlikely(!tr))
205 return 0;
206
207 if (!ftrace_trace_task(current))
208 return 0;
209
210 if (!ftrace_graph_addr(trace->func))
211 return 0;
212
213 local_irq_save(flags);
214 cpu = raw_smp_processor_id();
215 data = tr->data[cpu];
216 disabled = atomic_inc_return(&data->disabled);
217 if (likely(disabled == 1)) {
218 pc = preempt_count();
219 ret = __trace_graph_entry(tr, trace, flags, pc);
220 } else {
221 ret = 0;
222 }
223 /* Only do the atomic if it is not already set */
224 if (!test_tsk_trace_graph(current))
225 set_tsk_trace_graph(current);
226
227 atomic_dec(&data->disabled);
228 local_irq_restore(flags);
229
230 return ret;
231}
232
233static void __trace_graph_return(struct trace_array *tr,
234 struct ftrace_graph_ret *trace,
235 unsigned long flags,
236 int pc)
237{
238 struct ftrace_event_call *call = &event_funcgraph_exit;
239 struct ring_buffer_event *event;
240 struct ring_buffer *buffer = tr->buffer;
241 struct ftrace_graph_ret_entry *entry;
242
243 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
244 return;
245
246 event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_RET,
247 sizeof(*entry), flags, pc);
248 if (!event)
249 return;
250 entry = ring_buffer_event_data(event);
251 entry->ret = *trace;
252 if (!filter_current_check_discard(buffer, call, entry, event))
253 ring_buffer_unlock_commit(buffer, event);
254}
255
256void trace_graph_return(struct ftrace_graph_ret *trace)
257{
258 struct trace_array *tr = graph_array;
259 struct trace_array_cpu *data;
260 unsigned long flags;
261 long disabled;
262 int cpu;
263 int pc;
264
265 local_irq_save(flags);
266 cpu = raw_smp_processor_id();
267 data = tr->data[cpu];
268 disabled = atomic_inc_return(&data->disabled);
269 if (likely(disabled == 1)) {
270 pc = preempt_count();
271 __trace_graph_return(tr, trace, flags, pc);
272 }
273 if (!trace->depth)
274 clear_tsk_trace_graph(current);
275 atomic_dec(&data->disabled);
276 local_irq_restore(flags);
277}
278
141static int graph_trace_init(struct trace_array *tr) 279static int graph_trace_init(struct trace_array *tr)
142{ 280{
143 int ret = register_ftrace_graph(&trace_graph_return, 281 int ret;
144 &trace_graph_entry); 282
283 graph_array = tr;
284 ret = register_ftrace_graph(&trace_graph_return,
285 &trace_graph_entry);
145 if (ret) 286 if (ret)
146 return ret; 287 return ret;
147 tracing_start_cmdline_record(); 288 tracing_start_cmdline_record();
@@ -149,49 +290,30 @@ static int graph_trace_init(struct trace_array *tr)
149 return 0; 290 return 0;
150} 291}
151 292
293void set_graph_array(struct trace_array *tr)
294{
295 graph_array = tr;
296}
297
152static void graph_trace_reset(struct trace_array *tr) 298static void graph_trace_reset(struct trace_array *tr)
153{ 299{
154 tracing_stop_cmdline_record(); 300 tracing_stop_cmdline_record();
155 unregister_ftrace_graph(); 301 unregister_ftrace_graph();
156} 302}
157 303
158static inline int log10_cpu(int nb) 304static int max_bytes_for_cpu;
159{
160 if (nb / 100)
161 return 3;
162 if (nb / 10)
163 return 2;
164 return 1;
165}
166 305
167static enum print_line_t 306static enum print_line_t
168print_graph_cpu(struct trace_seq *s, int cpu) 307print_graph_cpu(struct trace_seq *s, int cpu)
169{ 308{
170 int i;
171 int ret; 309 int ret;
172 int log10_this = log10_cpu(cpu);
173 int log10_all = log10_cpu(cpumask_weight(cpu_online_mask));
174
175 310
176 /* 311 /*
177 * Start with a space character - to make it stand out 312 * Start with a space character - to make it stand out
178 * to the right a bit when trace output is pasted into 313 * to the right a bit when trace output is pasted into
179 * email: 314 * email:
180 */ 315 */
181 ret = trace_seq_printf(s, " "); 316 ret = trace_seq_printf(s, " %*d) ", max_bytes_for_cpu, cpu);
182
183 /*
184 * Tricky - we space the CPU field according to the max
185 * number of online CPUs. On a 2-cpu system it would take
186 * a maximum of 1 digit - on a 128 cpu system it would
187 * take up to 3 digits:
188 */
189 for (i = 0; i < log10_all - log10_this; i++) {
190 ret = trace_seq_printf(s, " ");
191 if (!ret)
192 return TRACE_TYPE_PARTIAL_LINE;
193 }
194 ret = trace_seq_printf(s, "%d) ", cpu);
195 if (!ret) 317 if (!ret)
196 return TRACE_TYPE_PARTIAL_LINE; 318 return TRACE_TYPE_PARTIAL_LINE;
197 319
@@ -537,11 +659,7 @@ print_graph_entry_leaf(struct trace_iterator *iter,
537 return TRACE_TYPE_PARTIAL_LINE; 659 return TRACE_TYPE_PARTIAL_LINE;
538 } 660 }
539 661
540 ret = seq_print_ip_sym(s, call->func, 0); 662 ret = trace_seq_printf(s, "%pf();\n", (void *)call->func);
541 if (!ret)
542 return TRACE_TYPE_PARTIAL_LINE;
543
544 ret = trace_seq_printf(s, "();\n");
545 if (!ret) 663 if (!ret)
546 return TRACE_TYPE_PARTIAL_LINE; 664 return TRACE_TYPE_PARTIAL_LINE;
547 665
@@ -584,11 +702,7 @@ print_graph_entry_nested(struct trace_iterator *iter,
584 return TRACE_TYPE_PARTIAL_LINE; 702 return TRACE_TYPE_PARTIAL_LINE;
585 } 703 }
586 704
587 ret = seq_print_ip_sym(s, call->func, 0); 705 ret = trace_seq_printf(s, "%pf() {\n", (void *)call->func);
588 if (!ret)
589 return TRACE_TYPE_PARTIAL_LINE;
590
591 ret = trace_seq_printf(s, "() {\n");
592 if (!ret) 706 if (!ret)
593 return TRACE_TYPE_PARTIAL_LINE; 707 return TRACE_TYPE_PARTIAL_LINE;
594 708
@@ -815,9 +929,16 @@ print_graph_function(struct trace_iterator *iter)
815 929
816 switch (entry->type) { 930 switch (entry->type) {
817 case TRACE_GRAPH_ENT: { 931 case TRACE_GRAPH_ENT: {
818 struct ftrace_graph_ent_entry *field; 932 /*
933 * print_graph_entry() may consume the current event,
934 * thus @field may become invalid, so we need to save it.
935 * sizeof(struct ftrace_graph_ent_entry) is very small,
936 * it can be safely saved at the stack.
937 */
938 struct ftrace_graph_ent_entry *field, saved;
819 trace_assign_type(field, entry); 939 trace_assign_type(field, entry);
820 return print_graph_entry(field, s, iter); 940 saved = *field;
941 return print_graph_entry(&saved, s, iter);
821 } 942 }
822 case TRACE_GRAPH_RET: { 943 case TRACE_GRAPH_RET: {
823 struct ftrace_graph_ret_entry *field; 944 struct ftrace_graph_ret_entry *field;
@@ -899,6 +1020,8 @@ static struct tracer graph_trace __read_mostly = {
899 1020
900static __init int init_graph_trace(void) 1021static __init int init_graph_trace(void)
901{ 1022{
1023 max_bytes_for_cpu = snprintf(NULL, 0, "%d", nr_cpu_ids - 1);
1024
902 return register_tracer(&graph_trace); 1025 return register_tracer(&graph_trace);
903} 1026}
904 1027
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index b923d13e2fad..5555b75a0d12 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -178,7 +178,6 @@ out_unlock:
178out: 178out:
179 data->critical_sequence = max_sequence; 179 data->critical_sequence = max_sequence;
180 data->preempt_timestamp = ftrace_now(cpu); 180 data->preempt_timestamp = ftrace_now(cpu);
181 tracing_reset(tr, cpu);
182 trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc); 181 trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc);
183} 182}
184 183
@@ -208,7 +207,6 @@ start_critical_timing(unsigned long ip, unsigned long parent_ip)
208 data->critical_sequence = max_sequence; 207 data->critical_sequence = max_sequence;
209 data->preempt_timestamp = ftrace_now(cpu); 208 data->preempt_timestamp = ftrace_now(cpu);
210 data->critical_start = parent_ip ? : ip; 209 data->critical_start = parent_ip ? : ip;
211 tracing_reset(tr, cpu);
212 210
213 local_save_flags(flags); 211 local_save_flags(flags);
214 212
@@ -379,6 +377,7 @@ static void __irqsoff_tracer_init(struct trace_array *tr)
379 irqsoff_trace = tr; 377 irqsoff_trace = tr;
380 /* make sure that the tracer is visible */ 378 /* make sure that the tracer is visible */
381 smp_wmb(); 379 smp_wmb();
380 tracing_reset_online_cpus(tr);
382 start_irqsoff_tracer(tr); 381 start_irqsoff_tracer(tr);
383} 382}
384 383
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index d53b45ed0806..c4c9bbda53d3 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -307,11 +307,12 @@ static void __trace_mmiotrace_rw(struct trace_array *tr,
307 struct trace_array_cpu *data, 307 struct trace_array_cpu *data,
308 struct mmiotrace_rw *rw) 308 struct mmiotrace_rw *rw)
309{ 309{
310 struct ring_buffer *buffer = tr->buffer;
310 struct ring_buffer_event *event; 311 struct ring_buffer_event *event;
311 struct trace_mmiotrace_rw *entry; 312 struct trace_mmiotrace_rw *entry;
312 int pc = preempt_count(); 313 int pc = preempt_count();
313 314
314 event = trace_buffer_lock_reserve(tr, TRACE_MMIO_RW, 315 event = trace_buffer_lock_reserve(buffer, TRACE_MMIO_RW,
315 sizeof(*entry), 0, pc); 316 sizeof(*entry), 0, pc);
316 if (!event) { 317 if (!event) {
317 atomic_inc(&dropped_count); 318 atomic_inc(&dropped_count);
@@ -319,7 +320,7 @@ static void __trace_mmiotrace_rw(struct trace_array *tr,
319 } 320 }
320 entry = ring_buffer_event_data(event); 321 entry = ring_buffer_event_data(event);
321 entry->rw = *rw; 322 entry->rw = *rw;
322 trace_buffer_unlock_commit(tr, event, 0, pc); 323 trace_buffer_unlock_commit(buffer, event, 0, pc);
323} 324}
324 325
325void mmio_trace_rw(struct mmiotrace_rw *rw) 326void mmio_trace_rw(struct mmiotrace_rw *rw)
@@ -333,11 +334,12 @@ static void __trace_mmiotrace_map(struct trace_array *tr,
333 struct trace_array_cpu *data, 334 struct trace_array_cpu *data,
334 struct mmiotrace_map *map) 335 struct mmiotrace_map *map)
335{ 336{
337 struct ring_buffer *buffer = tr->buffer;
336 struct ring_buffer_event *event; 338 struct ring_buffer_event *event;
337 struct trace_mmiotrace_map *entry; 339 struct trace_mmiotrace_map *entry;
338 int pc = preempt_count(); 340 int pc = preempt_count();
339 341
340 event = trace_buffer_lock_reserve(tr, TRACE_MMIO_MAP, 342 event = trace_buffer_lock_reserve(buffer, TRACE_MMIO_MAP,
341 sizeof(*entry), 0, pc); 343 sizeof(*entry), 0, pc);
342 if (!event) { 344 if (!event) {
343 atomic_inc(&dropped_count); 345 atomic_inc(&dropped_count);
@@ -345,7 +347,7 @@ static void __trace_mmiotrace_map(struct trace_array *tr,
345 } 347 }
346 entry = ring_buffer_event_data(event); 348 entry = ring_buffer_event_data(event);
347 entry->map = *map; 349 entry->map = *map;
348 trace_buffer_unlock_commit(tr, event, 0, pc); 350 trace_buffer_unlock_commit(buffer, event, 0, pc);
349} 351}
350 352
351void mmio_trace_mapping(struct mmiotrace_map *map) 353void mmio_trace_mapping(struct mmiotrace_map *map)
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 7938f3ae93e3..e0c2545622e8 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -27,8 +27,7 @@ void trace_print_seq(struct seq_file *m, struct trace_seq *s)
27{ 27{
28 int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len; 28 int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len;
29 29
30 s->buffer[len] = 0; 30 seq_write(m, s->buffer, len);
31 seq_puts(m, s->buffer);
32 31
33 trace_seq_init(s); 32 trace_seq_init(s);
34} 33}
diff --git a/kernel/trace/trace_power.c b/kernel/trace/trace_power.c
index 8a30d9874cd4..fe1a00f1445a 100644
--- a/kernel/trace/trace_power.c
+++ b/kernel/trace/trace_power.c
@@ -38,6 +38,7 @@ static void probe_power_end(struct power_trace *it)
38{ 38{
39 struct ftrace_event_call *call = &event_power; 39 struct ftrace_event_call *call = &event_power;
40 struct ring_buffer_event *event; 40 struct ring_buffer_event *event;
41 struct ring_buffer *buffer;
41 struct trace_power *entry; 42 struct trace_power *entry;
42 struct trace_array_cpu *data; 43 struct trace_array_cpu *data;
43 struct trace_array *tr = power_trace; 44 struct trace_array *tr = power_trace;
@@ -45,18 +46,20 @@ static void probe_power_end(struct power_trace *it)
45 if (!trace_power_enabled) 46 if (!trace_power_enabled)
46 return; 47 return;
47 48
49 buffer = tr->buffer;
50
48 preempt_disable(); 51 preempt_disable();
49 it->end = ktime_get(); 52 it->end = ktime_get();
50 data = tr->data[smp_processor_id()]; 53 data = tr->data[smp_processor_id()];
51 54
52 event = trace_buffer_lock_reserve(tr, TRACE_POWER, 55 event = trace_buffer_lock_reserve(buffer, TRACE_POWER,
53 sizeof(*entry), 0, 0); 56 sizeof(*entry), 0, 0);
54 if (!event) 57 if (!event)
55 goto out; 58 goto out;
56 entry = ring_buffer_event_data(event); 59 entry = ring_buffer_event_data(event);
57 entry->state_data = *it; 60 entry->state_data = *it;
58 if (!filter_check_discard(call, entry, tr->buffer, event)) 61 if (!filter_check_discard(call, entry, buffer, event))
59 trace_buffer_unlock_commit(tr, event, 0, 0); 62 trace_buffer_unlock_commit(buffer, event, 0, 0);
60 out: 63 out:
61 preempt_enable(); 64 preempt_enable();
62} 65}
@@ -66,6 +69,7 @@ static void probe_power_mark(struct power_trace *it, unsigned int type,
66{ 69{
67 struct ftrace_event_call *call = &event_power; 70 struct ftrace_event_call *call = &event_power;
68 struct ring_buffer_event *event; 71 struct ring_buffer_event *event;
72 struct ring_buffer *buffer;
69 struct trace_power *entry; 73 struct trace_power *entry;
70 struct trace_array_cpu *data; 74 struct trace_array_cpu *data;
71 struct trace_array *tr = power_trace; 75 struct trace_array *tr = power_trace;
@@ -73,6 +77,8 @@ static void probe_power_mark(struct power_trace *it, unsigned int type,
73 if (!trace_power_enabled) 77 if (!trace_power_enabled)
74 return; 78 return;
75 79
80 buffer = tr->buffer;
81
76 memset(it, 0, sizeof(struct power_trace)); 82 memset(it, 0, sizeof(struct power_trace));
77 it->state = level; 83 it->state = level;
78 it->type = type; 84 it->type = type;
@@ -81,14 +87,14 @@ static void probe_power_mark(struct power_trace *it, unsigned int type,
81 it->end = it->stamp; 87 it->end = it->stamp;
82 data = tr->data[smp_processor_id()]; 88 data = tr->data[smp_processor_id()];
83 89
84 event = trace_buffer_lock_reserve(tr, TRACE_POWER, 90 event = trace_buffer_lock_reserve(buffer, TRACE_POWER,
85 sizeof(*entry), 0, 0); 91 sizeof(*entry), 0, 0);
86 if (!event) 92 if (!event)
87 goto out; 93 goto out;
88 entry = ring_buffer_event_data(event); 94 entry = ring_buffer_event_data(event);
89 entry->state_data = *it; 95 entry->state_data = *it;
90 if (!filter_check_discard(call, entry, tr->buffer, event)) 96 if (!filter_check_discard(call, entry, buffer, event))
91 trace_buffer_unlock_commit(tr, event, 0, 0); 97 trace_buffer_unlock_commit(buffer, event, 0, 0);
92 out: 98 out:
93 preempt_enable(); 99 preempt_enable();
94} 100}
@@ -144,14 +150,12 @@ static void power_trace_reset(struct trace_array *tr)
144 150
145static int power_trace_init(struct trace_array *tr) 151static int power_trace_init(struct trace_array *tr)
146{ 152{
147 int cpu;
148 power_trace = tr; 153 power_trace = tr;
149 154
150 trace_power_enabled = 1; 155 trace_power_enabled = 1;
151 tracing_power_register(); 156 tracing_power_register();
152 157
153 for_each_cpu(cpu, cpu_possible_mask) 158 tracing_reset_online_cpus(tr);
154 tracing_reset(tr, cpu);
155 return 0; 159 return 0;
156} 160}
157 161
diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c
index 9bece9687b62..687699d365ae 100644
--- a/kernel/trace/trace_printk.c
+++ b/kernel/trace/trace_printk.c
@@ -155,25 +155,19 @@ int __ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap)
155EXPORT_SYMBOL_GPL(__ftrace_vprintk); 155EXPORT_SYMBOL_GPL(__ftrace_vprintk);
156 156
157static void * 157static void *
158t_next(struct seq_file *m, void *v, loff_t *pos) 158t_start(struct seq_file *m, loff_t *pos)
159{ 159{
160 const char **fmt = m->private; 160 const char **fmt = __start___trace_bprintk_fmt + *pos;
161 const char **next = fmt;
162
163 (*pos)++;
164 161
165 if ((unsigned long)fmt >= (unsigned long)__stop___trace_bprintk_fmt) 162 if ((unsigned long)fmt >= (unsigned long)__stop___trace_bprintk_fmt)
166 return NULL; 163 return NULL;
167
168 next = fmt;
169 m->private = ++next;
170
171 return fmt; 164 return fmt;
172} 165}
173 166
174static void *t_start(struct seq_file *m, loff_t *pos) 167static void *t_next(struct seq_file *m, void * v, loff_t *pos)
175{ 168{
176 return t_next(m, NULL, pos); 169 (*pos)++;
170 return t_start(m, pos);
177} 171}
178 172
179static int t_show(struct seq_file *m, void *v) 173static int t_show(struct seq_file *m, void *v)
@@ -182,7 +176,7 @@ static int t_show(struct seq_file *m, void *v)
182 const char *str = *fmt; 176 const char *str = *fmt;
183 int i; 177 int i;
184 178
185 seq_printf(m, "0x%lx : \"", (unsigned long)fmt); 179 seq_printf(m, "0x%lx : \"", *(unsigned long *)fmt);
186 180
187 /* 181 /*
188 * Tabs and new lines need to be converted. 182 * Tabs and new lines need to be converted.
@@ -224,15 +218,7 @@ static const struct seq_operations show_format_seq_ops = {
224static int 218static int
225ftrace_formats_open(struct inode *inode, struct file *file) 219ftrace_formats_open(struct inode *inode, struct file *file)
226{ 220{
227 int ret; 221 return seq_open(file, &show_format_seq_ops);
228
229 ret = seq_open(file, &show_format_seq_ops);
230 if (!ret) {
231 struct seq_file *m = file->private_data;
232
233 m->private = __start___trace_bprintk_fmt;
234 }
235 return ret;
236} 222}
237 223
238static const struct file_operations ftrace_formats_fops = { 224static const struct file_operations ftrace_formats_fops = {
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index a98106dd979c..5fca0f51fde4 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -20,6 +20,35 @@ static int sched_ref;
20static DEFINE_MUTEX(sched_register_mutex); 20static DEFINE_MUTEX(sched_register_mutex);
21static int sched_stopped; 21static int sched_stopped;
22 22
23
24void
25tracing_sched_switch_trace(struct trace_array *tr,
26 struct task_struct *prev,
27 struct task_struct *next,
28 unsigned long flags, int pc)
29{
30 struct ftrace_event_call *call = &event_context_switch;
31 struct ring_buffer *buffer = tr->buffer;
32 struct ring_buffer_event *event;
33 struct ctx_switch_entry *entry;
34
35 event = trace_buffer_lock_reserve(buffer, TRACE_CTX,
36 sizeof(*entry), flags, pc);
37 if (!event)
38 return;
39 entry = ring_buffer_event_data(event);
40 entry->prev_pid = prev->pid;
41 entry->prev_prio = prev->prio;
42 entry->prev_state = prev->state;
43 entry->next_pid = next->pid;
44 entry->next_prio = next->prio;
45 entry->next_state = next->state;
46 entry->next_cpu = task_cpu(next);
47
48 if (!filter_check_discard(call, entry, buffer, event))
49 trace_buffer_unlock_commit(buffer, event, flags, pc);
50}
51
23static void 52static void
24probe_sched_switch(struct rq *__rq, struct task_struct *prev, 53probe_sched_switch(struct rq *__rq, struct task_struct *prev,
25 struct task_struct *next) 54 struct task_struct *next)
@@ -49,6 +78,36 @@ probe_sched_switch(struct rq *__rq, struct task_struct *prev,
49 local_irq_restore(flags); 78 local_irq_restore(flags);
50} 79}
51 80
81void
82tracing_sched_wakeup_trace(struct trace_array *tr,
83 struct task_struct *wakee,
84 struct task_struct *curr,
85 unsigned long flags, int pc)
86{
87 struct ftrace_event_call *call = &event_wakeup;
88 struct ring_buffer_event *event;
89 struct ctx_switch_entry *entry;
90 struct ring_buffer *buffer = tr->buffer;
91
92 event = trace_buffer_lock_reserve(buffer, TRACE_WAKE,
93 sizeof(*entry), flags, pc);
94 if (!event)
95 return;
96 entry = ring_buffer_event_data(event);
97 entry->prev_pid = curr->pid;
98 entry->prev_prio = curr->prio;
99 entry->prev_state = curr->state;
100 entry->next_pid = wakee->pid;
101 entry->next_prio = wakee->prio;
102 entry->next_state = wakee->state;
103 entry->next_cpu = task_cpu(wakee);
104
105 if (!filter_check_discard(call, entry, buffer, event))
106 ring_buffer_unlock_commit(buffer, event);
107 ftrace_trace_stack(tr->buffer, flags, 6, pc);
108 ftrace_trace_userstack(tr->buffer, flags, pc);
109}
110
52static void 111static void
53probe_sched_wakeup(struct rq *__rq, struct task_struct *wakee, int success) 112probe_sched_wakeup(struct rq *__rq, struct task_struct *wakee, int success)
54{ 113{
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index eacb27225173..ad69f105a7c6 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -186,11 +186,6 @@ out:
186 186
187static void __wakeup_reset(struct trace_array *tr) 187static void __wakeup_reset(struct trace_array *tr)
188{ 188{
189 int cpu;
190
191 for_each_possible_cpu(cpu)
192 tracing_reset(tr, cpu);
193
194 wakeup_cpu = -1; 189 wakeup_cpu = -1;
195 wakeup_prio = -1; 190 wakeup_prio = -1;
196 191
@@ -204,6 +199,8 @@ static void wakeup_reset(struct trace_array *tr)
204{ 199{
205 unsigned long flags; 200 unsigned long flags;
206 201
202 tracing_reset_online_cpus(tr);
203
207 local_irq_save(flags); 204 local_irq_save(flags);
208 __raw_spin_lock(&wakeup_lock); 205 __raw_spin_lock(&wakeup_lock);
209 __wakeup_reset(tr); 206 __wakeup_reset(tr);
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 71f2edb0fd84..7179c12e4f0f 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -289,6 +289,7 @@ trace_selftest_startup_function_graph(struct tracer *trace,
289 * to detect and recover from possible hangs 289 * to detect and recover from possible hangs
290 */ 290 */
291 tracing_reset_online_cpus(tr); 291 tracing_reset_online_cpus(tr);
292 set_graph_array(tr);
292 ret = register_ftrace_graph(&trace_graph_return, 293 ret = register_ftrace_graph(&trace_graph_return,
293 &trace_graph_entry_watchdog); 294 &trace_graph_entry_watchdog);
294 if (ret) { 295 if (ret) {
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 2d7aebd71dbd..0f6facb050a1 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -186,43 +186,33 @@ static const struct file_operations stack_max_size_fops = {
186}; 186};
187 187
188static void * 188static void *
189t_next(struct seq_file *m, void *v, loff_t *pos) 189__next(struct seq_file *m, loff_t *pos)
190{ 190{
191 long i; 191 long n = *pos - 1;
192
193 (*pos)++;
194
195 if (v == SEQ_START_TOKEN)
196 i = 0;
197 else {
198 i = *(long *)v;
199 i++;
200 }
201 192
202 if (i >= max_stack_trace.nr_entries || 193 if (n >= max_stack_trace.nr_entries || stack_dump_trace[n] == ULONG_MAX)
203 stack_dump_trace[i] == ULONG_MAX)
204 return NULL; 194 return NULL;
205 195
206 m->private = (void *)i; 196 m->private = (void *)n;
207
208 return &m->private; 197 return &m->private;
209} 198}
210 199
211static void *t_start(struct seq_file *m, loff_t *pos) 200static void *
201t_next(struct seq_file *m, void *v, loff_t *pos)
212{ 202{
213 void *t = SEQ_START_TOKEN; 203 (*pos)++;
214 loff_t l = 0; 204 return __next(m, pos);
205}
215 206
207static void *t_start(struct seq_file *m, loff_t *pos)
208{
216 local_irq_disable(); 209 local_irq_disable();
217 __raw_spin_lock(&max_stack_lock); 210 __raw_spin_lock(&max_stack_lock);
218 211
219 if (*pos == 0) 212 if (*pos == 0)
220 return SEQ_START_TOKEN; 213 return SEQ_START_TOKEN;
221 214
222 for (; t && l < *pos; t = t_next(m, t, &l)) 215 return __next(m, pos);
223 ;
224
225 return t;
226} 216}
227 217
228static void t_stop(struct seq_file *m, void *p) 218static void t_stop(struct seq_file *m, void *p)
@@ -234,15 +224,8 @@ static void t_stop(struct seq_file *m, void *p)
234static int trace_lookup_stack(struct seq_file *m, long i) 224static int trace_lookup_stack(struct seq_file *m, long i)
235{ 225{
236 unsigned long addr = stack_dump_trace[i]; 226 unsigned long addr = stack_dump_trace[i];
237#ifdef CONFIG_KALLSYMS
238 char str[KSYM_SYMBOL_LEN];
239 227
240 sprint_symbol(str, addr); 228 return seq_printf(m, "%pF\n", (void *)addr);
241
242 return seq_printf(m, "%s\n", str);
243#else
244 return seq_printf(m, "%p\n", (void*)addr);
245#endif
246} 229}
247 230
248static void print_disabled(struct seq_file *m) 231static void print_disabled(struct seq_file *m)
@@ -301,17 +284,14 @@ static const struct seq_operations stack_trace_seq_ops = {
301 284
302static int stack_trace_open(struct inode *inode, struct file *file) 285static int stack_trace_open(struct inode *inode, struct file *file)
303{ 286{
304 int ret; 287 return seq_open(file, &stack_trace_seq_ops);
305
306 ret = seq_open(file, &stack_trace_seq_ops);
307
308 return ret;
309} 288}
310 289
311static const struct file_operations stack_trace_fops = { 290static const struct file_operations stack_trace_fops = {
312 .open = stack_trace_open, 291 .open = stack_trace_open,
313 .read = seq_read, 292 .read = seq_read,
314 .llseek = seq_lseek, 293 .llseek = seq_lseek,
294 .release = seq_release,
315}; 295};
316 296
317int 297int
@@ -326,10 +306,10 @@ stack_trace_sysctl(struct ctl_table *table, int write,
326 ret = proc_dointvec(table, write, file, buffer, lenp, ppos); 306 ret = proc_dointvec(table, write, file, buffer, lenp, ppos);
327 307
328 if (ret || !write || 308 if (ret || !write ||
329 (last_stack_tracer_enabled == stack_tracer_enabled)) 309 (last_stack_tracer_enabled == !!stack_tracer_enabled))
330 goto out; 310 goto out;
331 311
332 last_stack_tracer_enabled = stack_tracer_enabled; 312 last_stack_tracer_enabled = !!stack_tracer_enabled;
333 313
334 if (stack_tracer_enabled) 314 if (stack_tracer_enabled)
335 register_ftrace_function(&trace_ops); 315 register_ftrace_function(&trace_ops);
diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c
index c00643733f4c..a4bb239eb987 100644
--- a/kernel/trace/trace_stat.c
+++ b/kernel/trace/trace_stat.c
@@ -49,7 +49,8 @@ static struct dentry *stat_dir;
49 * but it will at least advance closer to the next one 49 * but it will at least advance closer to the next one
50 * to be released. 50 * to be released.
51 */ 51 */
52static struct rb_node *release_next(struct rb_node *node) 52static struct rb_node *release_next(struct tracer_stat *ts,
53 struct rb_node *node)
53{ 54{
54 struct stat_node *snode; 55 struct stat_node *snode;
55 struct rb_node *parent = rb_parent(node); 56 struct rb_node *parent = rb_parent(node);
@@ -67,26 +68,35 @@ static struct rb_node *release_next(struct rb_node *node)
67 parent->rb_right = NULL; 68 parent->rb_right = NULL;
68 69
69 snode = container_of(node, struct stat_node, node); 70 snode = container_of(node, struct stat_node, node);
71 if (ts->stat_release)
72 ts->stat_release(snode->stat);
70 kfree(snode); 73 kfree(snode);
71 74
72 return parent; 75 return parent;
73 } 76 }
74} 77}
75 78
76static void reset_stat_session(struct stat_session *session) 79static void __reset_stat_session(struct stat_session *session)
77{ 80{
78 struct rb_node *node = session->stat_root.rb_node; 81 struct rb_node *node = session->stat_root.rb_node;
79 82
80 while (node) 83 while (node)
81 node = release_next(node); 84 node = release_next(session->ts, node);
82 85
83 session->stat_root = RB_ROOT; 86 session->stat_root = RB_ROOT;
84} 87}
85 88
89static void reset_stat_session(struct stat_session *session)
90{
91 mutex_lock(&session->stat_mutex);
92 __reset_stat_session(session);
93 mutex_unlock(&session->stat_mutex);
94}
95
86static void destroy_session(struct stat_session *session) 96static void destroy_session(struct stat_session *session)
87{ 97{
88 debugfs_remove(session->file); 98 debugfs_remove(session->file);
89 reset_stat_session(session); 99 __reset_stat_session(session);
90 mutex_destroy(&session->stat_mutex); 100 mutex_destroy(&session->stat_mutex);
91 kfree(session); 101 kfree(session);
92} 102}
@@ -150,7 +160,7 @@ static int stat_seq_init(struct stat_session *session)
150 int i; 160 int i;
151 161
152 mutex_lock(&session->stat_mutex); 162 mutex_lock(&session->stat_mutex);
153 reset_stat_session(session); 163 __reset_stat_session(session);
154 164
155 if (!ts->stat_cmp) 165 if (!ts->stat_cmp)
156 ts->stat_cmp = dummy_cmp; 166 ts->stat_cmp = dummy_cmp;
@@ -183,7 +193,7 @@ exit:
183 return ret; 193 return ret;
184 194
185exit_free_rbtree: 195exit_free_rbtree:
186 reset_stat_session(session); 196 __reset_stat_session(session);
187 mutex_unlock(&session->stat_mutex); 197 mutex_unlock(&session->stat_mutex);
188 return ret; 198 return ret;
189} 199}
@@ -193,23 +203,23 @@ static void *stat_seq_start(struct seq_file *s, loff_t *pos)
193{ 203{
194 struct stat_session *session = s->private; 204 struct stat_session *session = s->private;
195 struct rb_node *node; 205 struct rb_node *node;
206 int n = *pos;
196 int i; 207 int i;
197 208
198 /* Prevent from tracer switch or rbtree modification */ 209 /* Prevent from tracer switch or rbtree modification */
199 mutex_lock(&session->stat_mutex); 210 mutex_lock(&session->stat_mutex);
200 211
201 /* If we are in the beginning of the file, print the headers */ 212 /* If we are in the beginning of the file, print the headers */
202 if (!*pos && session->ts->stat_headers) { 213 if (session->ts->stat_headers) {
203 (*pos)++; 214 if (n == 0)
204 return SEQ_START_TOKEN; 215 return SEQ_START_TOKEN;
216 n--;
205 } 217 }
206 218
207 node = rb_first(&session->stat_root); 219 node = rb_first(&session->stat_root);
208 for (i = 0; node && i < *pos; i++) 220 for (i = 0; node && i < n; i++)
209 node = rb_next(node); 221 node = rb_next(node);
210 222
211 (*pos)++;
212
213 return node; 223 return node;
214} 224}
215 225
@@ -254,16 +264,21 @@ static const struct seq_operations trace_stat_seq_ops = {
254static int tracing_stat_open(struct inode *inode, struct file *file) 264static int tracing_stat_open(struct inode *inode, struct file *file)
255{ 265{
256 int ret; 266 int ret;
257 267 struct seq_file *m;
258 struct stat_session *session = inode->i_private; 268 struct stat_session *session = inode->i_private;
259 269
270 ret = stat_seq_init(session);
271 if (ret)
272 return ret;
273
260 ret = seq_open(file, &trace_stat_seq_ops); 274 ret = seq_open(file, &trace_stat_seq_ops);
261 if (!ret) { 275 if (ret) {
262 struct seq_file *m = file->private_data; 276 reset_stat_session(session);
263 m->private = session; 277 return ret;
264 ret = stat_seq_init(session);
265 } 278 }
266 279
280 m = file->private_data;
281 m->private = session;
267 return ret; 282 return ret;
268} 283}
269 284
@@ -274,11 +289,9 @@ static int tracing_stat_release(struct inode *i, struct file *f)
274{ 289{
275 struct stat_session *session = i->i_private; 290 struct stat_session *session = i->i_private;
276 291
277 mutex_lock(&session->stat_mutex);
278 reset_stat_session(session); 292 reset_stat_session(session);
279 mutex_unlock(&session->stat_mutex);
280 293
281 return 0; 294 return seq_release(i, f);
282} 295}
283 296
284static const struct file_operations tracing_stat_fops = { 297static const struct file_operations tracing_stat_fops = {
diff --git a/kernel/trace/trace_stat.h b/kernel/trace/trace_stat.h
index f3546a2cd826..8f03914b9a6a 100644
--- a/kernel/trace/trace_stat.h
+++ b/kernel/trace/trace_stat.h
@@ -18,6 +18,8 @@ struct tracer_stat {
18 int (*stat_cmp)(void *p1, void *p2); 18 int (*stat_cmp)(void *p1, void *p2);
19 /* Print a stat entry */ 19 /* Print a stat entry */
20 int (*stat_show)(struct seq_file *s, void *p); 20 int (*stat_show)(struct seq_file *s, void *p);
21 /* Release an entry */
22 void (*stat_release)(void *stat);
21 /* Print the headers of your stat entries */ 23 /* Print the headers of your stat entries */
22 int (*stat_headers)(struct seq_file *s); 24 int (*stat_headers)(struct seq_file *s);
23}; 25};
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 5e579645ac86..8712ce3c6a0e 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -1,30 +1,18 @@
1#include <trace/syscall.h> 1#include <trace/syscall.h>
2#include <trace/events/syscalls.h>
2#include <linux/kernel.h> 3#include <linux/kernel.h>
4#include <linux/ftrace.h>
5#include <linux/perf_counter.h>
3#include <asm/syscall.h> 6#include <asm/syscall.h>
4 7
5#include "trace_output.h" 8#include "trace_output.h"
6#include "trace.h" 9#include "trace.h"
7 10
8/* Keep a counter of the syscall tracing users */
9static int refcount;
10
11/* Prevent from races on thread flags toggling */
12static DEFINE_MUTEX(syscall_trace_lock); 11static DEFINE_MUTEX(syscall_trace_lock);
13 12static int sys_refcount_enter;
14/* Option to display the parameters types */ 13static int sys_refcount_exit;
15enum { 14static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls);
16 TRACE_SYSCALLS_OPT_TYPES = 0x1, 15static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls);
17};
18
19static struct tracer_opt syscalls_opts[] = {
20 { TRACER_OPT(syscall_arg_type, TRACE_SYSCALLS_OPT_TYPES) },
21 { }
22};
23
24static struct tracer_flags syscalls_flags = {
25 .val = 0, /* By default: no parameters types */
26 .opts = syscalls_opts
27};
28 16
29enum print_line_t 17enum print_line_t
30print_syscall_enter(struct trace_iterator *iter, int flags) 18print_syscall_enter(struct trace_iterator *iter, int flags)
@@ -35,35 +23,46 @@ print_syscall_enter(struct trace_iterator *iter, int flags)
35 struct syscall_metadata *entry; 23 struct syscall_metadata *entry;
36 int i, ret, syscall; 24 int i, ret, syscall;
37 25
38 trace_assign_type(trace, ent); 26 trace = (typeof(trace))ent;
39
40 syscall = trace->nr; 27 syscall = trace->nr;
41
42 entry = syscall_nr_to_meta(syscall); 28 entry = syscall_nr_to_meta(syscall);
29
43 if (!entry) 30 if (!entry)
44 goto end; 31 goto end;
45 32
33 if (entry->enter_id != ent->type) {
34 WARN_ON_ONCE(1);
35 goto end;
36 }
37
46 ret = trace_seq_printf(s, "%s(", entry->name); 38 ret = trace_seq_printf(s, "%s(", entry->name);
47 if (!ret) 39 if (!ret)
48 return TRACE_TYPE_PARTIAL_LINE; 40 return TRACE_TYPE_PARTIAL_LINE;
49 41
50 for (i = 0; i < entry->nb_args; i++) { 42 for (i = 0; i < entry->nb_args; i++) {
51 /* parameter types */ 43 /* parameter types */
52 if (syscalls_flags.val & TRACE_SYSCALLS_OPT_TYPES) { 44 if (trace_flags & TRACE_ITER_VERBOSE) {
53 ret = trace_seq_printf(s, "%s ", entry->types[i]); 45 ret = trace_seq_printf(s, "%s ", entry->types[i]);
54 if (!ret) 46 if (!ret)
55 return TRACE_TYPE_PARTIAL_LINE; 47 return TRACE_TYPE_PARTIAL_LINE;
56 } 48 }
57 /* parameter values */ 49 /* parameter values */
58 ret = trace_seq_printf(s, "%s: %lx%s ", entry->args[i], 50 ret = trace_seq_printf(s, "%s: %lx%s", entry->args[i],
59 trace->args[i], 51 trace->args[i],
60 i == entry->nb_args - 1 ? ")" : ","); 52 i == entry->nb_args - 1 ? "" : ", ");
61 if (!ret) 53 if (!ret)
62 return TRACE_TYPE_PARTIAL_LINE; 54 return TRACE_TYPE_PARTIAL_LINE;
63 } 55 }
64 56
57 ret = trace_seq_putc(s, ')');
58 if (!ret)
59 return TRACE_TYPE_PARTIAL_LINE;
60
65end: 61end:
66 trace_seq_printf(s, "\n"); 62 ret = trace_seq_putc(s, '\n');
63 if (!ret)
64 return TRACE_TYPE_PARTIAL_LINE;
65
67 return TRACE_TYPE_HANDLED; 66 return TRACE_TYPE_HANDLED;
68} 67}
69 68
@@ -77,16 +76,20 @@ print_syscall_exit(struct trace_iterator *iter, int flags)
77 struct syscall_metadata *entry; 76 struct syscall_metadata *entry;
78 int ret; 77 int ret;
79 78
80 trace_assign_type(trace, ent); 79 trace = (typeof(trace))ent;
81
82 syscall = trace->nr; 80 syscall = trace->nr;
83
84 entry = syscall_nr_to_meta(syscall); 81 entry = syscall_nr_to_meta(syscall);
82
85 if (!entry) { 83 if (!entry) {
86 trace_seq_printf(s, "\n"); 84 trace_seq_printf(s, "\n");
87 return TRACE_TYPE_HANDLED; 85 return TRACE_TYPE_HANDLED;
88 } 86 }
89 87
88 if (entry->exit_id != ent->type) {
89 WARN_ON_ONCE(1);
90 return TRACE_TYPE_UNHANDLED;
91 }
92
90 ret = trace_seq_printf(s, "%s -> 0x%lx\n", entry->name, 93 ret = trace_seq_printf(s, "%s -> 0x%lx\n", entry->name,
91 trace->ret); 94 trace->ret);
92 if (!ret) 95 if (!ret)
@@ -95,62 +98,140 @@ print_syscall_exit(struct trace_iterator *iter, int flags)
95 return TRACE_TYPE_HANDLED; 98 return TRACE_TYPE_HANDLED;
96} 99}
97 100
98void start_ftrace_syscalls(void) 101extern char *__bad_type_size(void);
102
103#define SYSCALL_FIELD(type, name) \
104 sizeof(type) != sizeof(trace.name) ? \
105 __bad_type_size() : \
106 #type, #name, offsetof(typeof(trace), name), sizeof(trace.name)
107
108int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s)
99{ 109{
100 unsigned long flags; 110 int i;
101 struct task_struct *g, *t; 111 int nr;
112 int ret;
113 struct syscall_metadata *entry;
114 struct syscall_trace_enter trace;
115 int offset = offsetof(struct syscall_trace_enter, args);
102 116
103 mutex_lock(&syscall_trace_lock); 117 nr = syscall_name_to_nr(call->data);
118 entry = syscall_nr_to_meta(nr);
104 119
105 /* Don't enable the flag on the tasks twice */ 120 if (!entry)
106 if (++refcount != 1) 121 return 0;
107 goto unlock;
108 122
109 arch_init_ftrace_syscalls(); 123 ret = trace_seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n",
110 read_lock_irqsave(&tasklist_lock, flags); 124 SYSCALL_FIELD(int, nr));
125 if (!ret)
126 return 0;
111 127
112 do_each_thread(g, t) { 128 for (i = 0; i < entry->nb_args; i++) {
113 set_tsk_thread_flag(t, TIF_SYSCALL_FTRACE); 129 ret = trace_seq_printf(s, "\tfield:%s %s;", entry->types[i],
114 } while_each_thread(g, t); 130 entry->args[i]);
131 if (!ret)
132 return 0;
133 ret = trace_seq_printf(s, "\toffset:%d;\tsize:%zu;\n", offset,
134 sizeof(unsigned long));
135 if (!ret)
136 return 0;
137 offset += sizeof(unsigned long);
138 }
115 139
116 read_unlock_irqrestore(&tasklist_lock, flags); 140 trace_seq_puts(s, "\nprint fmt: \"");
141 for (i = 0; i < entry->nb_args; i++) {
142 ret = trace_seq_printf(s, "%s: 0x%%0%zulx%s", entry->args[i],
143 sizeof(unsigned long),
144 i == entry->nb_args - 1 ? "" : ", ");
145 if (!ret)
146 return 0;
147 }
148 trace_seq_putc(s, '"');
117 149
118unlock: 150 for (i = 0; i < entry->nb_args; i++) {
119 mutex_unlock(&syscall_trace_lock); 151 ret = trace_seq_printf(s, ", ((unsigned long)(REC->%s))",
152 entry->args[i]);
153 if (!ret)
154 return 0;
155 }
156
157 return trace_seq_putc(s, '\n');
120} 158}
121 159
122void stop_ftrace_syscalls(void) 160int syscall_exit_format(struct ftrace_event_call *call, struct trace_seq *s)
123{ 161{
124 unsigned long flags; 162 int ret;
125 struct task_struct *g, *t; 163 struct syscall_trace_exit trace;
126 164
127 mutex_lock(&syscall_trace_lock); 165 ret = trace_seq_printf(s,
166 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
167 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n",
168 SYSCALL_FIELD(int, nr),
169 SYSCALL_FIELD(unsigned long, ret));
170 if (!ret)
171 return 0;
128 172
129 /* There are perhaps still some users */ 173 return trace_seq_printf(s, "\nprint fmt: \"0x%%lx\", REC->ret\n");
130 if (--refcount) 174}
131 goto unlock;
132 175
133 read_lock_irqsave(&tasklist_lock, flags); 176int syscall_enter_define_fields(struct ftrace_event_call *call)
177{
178 struct syscall_trace_enter trace;
179 struct syscall_metadata *meta;
180 int ret;
181 int nr;
182 int i;
183 int offset = offsetof(typeof(trace), args);
184
185 nr = syscall_name_to_nr(call->data);
186 meta = syscall_nr_to_meta(nr);
187
188 if (!meta)
189 return 0;
190
191 ret = trace_define_common_fields(call);
192 if (ret)
193 return ret;
194
195 for (i = 0; i < meta->nb_args; i++) {
196 ret = trace_define_field(call, meta->types[i],
197 meta->args[i], offset,
198 sizeof(unsigned long), 0,
199 FILTER_OTHER);
200 offset += sizeof(unsigned long);
201 }
134 202
135 do_each_thread(g, t) { 203 return ret;
136 clear_tsk_thread_flag(t, TIF_SYSCALL_FTRACE); 204}
137 } while_each_thread(g, t);
138 205
139 read_unlock_irqrestore(&tasklist_lock, flags); 206int syscall_exit_define_fields(struct ftrace_event_call *call)
207{
208 struct syscall_trace_exit trace;
209 int ret;
140 210
141unlock: 211 ret = trace_define_common_fields(call);
142 mutex_unlock(&syscall_trace_lock); 212 if (ret)
213 return ret;
214
215 ret = trace_define_field(call, SYSCALL_FIELD(unsigned long, ret), 0,
216 FILTER_OTHER);
217
218 return ret;
143} 219}
144 220
145void ftrace_syscall_enter(struct pt_regs *regs) 221void ftrace_syscall_enter(struct pt_regs *regs, long id)
146{ 222{
147 struct syscall_trace_enter *entry; 223 struct syscall_trace_enter *entry;
148 struct syscall_metadata *sys_data; 224 struct syscall_metadata *sys_data;
149 struct ring_buffer_event *event; 225 struct ring_buffer_event *event;
226 struct ring_buffer *buffer;
150 int size; 227 int size;
151 int syscall_nr; 228 int syscall_nr;
152 229
153 syscall_nr = syscall_get_nr(current, regs); 230 syscall_nr = syscall_get_nr(current, regs);
231 if (syscall_nr < 0)
232 return;
233 if (!test_bit(syscall_nr, enabled_enter_syscalls))
234 return;
154 235
155 sys_data = syscall_nr_to_meta(syscall_nr); 236 sys_data = syscall_nr_to_meta(syscall_nr);
156 if (!sys_data) 237 if (!sys_data)
@@ -158,8 +239,8 @@ void ftrace_syscall_enter(struct pt_regs *regs)
158 239
159 size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args; 240 size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args;
160 241
161 event = trace_current_buffer_lock_reserve(TRACE_SYSCALL_ENTER, size, 242 event = trace_current_buffer_lock_reserve(&buffer, sys_data->enter_id,
162 0, 0); 243 size, 0, 0);
163 if (!event) 244 if (!event)
164 return; 245 return;
165 246
@@ -167,24 +248,30 @@ void ftrace_syscall_enter(struct pt_regs *regs)
167 entry->nr = syscall_nr; 248 entry->nr = syscall_nr;
168 syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args); 249 syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args);
169 250
170 trace_current_buffer_unlock_commit(event, 0, 0); 251 if (!filter_current_check_discard(buffer, sys_data->enter_event,
171 trace_wake_up(); 252 entry, event))
253 trace_current_buffer_unlock_commit(buffer, event, 0, 0);
172} 254}
173 255
174void ftrace_syscall_exit(struct pt_regs *regs) 256void ftrace_syscall_exit(struct pt_regs *regs, long ret)
175{ 257{
176 struct syscall_trace_exit *entry; 258 struct syscall_trace_exit *entry;
177 struct syscall_metadata *sys_data; 259 struct syscall_metadata *sys_data;
178 struct ring_buffer_event *event; 260 struct ring_buffer_event *event;
261 struct ring_buffer *buffer;
179 int syscall_nr; 262 int syscall_nr;
180 263
181 syscall_nr = syscall_get_nr(current, regs); 264 syscall_nr = syscall_get_nr(current, regs);
265 if (syscall_nr < 0)
266 return;
267 if (!test_bit(syscall_nr, enabled_exit_syscalls))
268 return;
182 269
183 sys_data = syscall_nr_to_meta(syscall_nr); 270 sys_data = syscall_nr_to_meta(syscall_nr);
184 if (!sys_data) 271 if (!sys_data)
185 return; 272 return;
186 273
187 event = trace_current_buffer_lock_reserve(TRACE_SYSCALL_EXIT, 274 event = trace_current_buffer_lock_reserve(&buffer, sys_data->exit_id,
188 sizeof(*entry), 0, 0); 275 sizeof(*entry), 0, 0);
189 if (!event) 276 if (!event)
190 return; 277 return;
@@ -193,58 +280,244 @@ void ftrace_syscall_exit(struct pt_regs *regs)
193 entry->nr = syscall_nr; 280 entry->nr = syscall_nr;
194 entry->ret = syscall_get_return_value(current, regs); 281 entry->ret = syscall_get_return_value(current, regs);
195 282
196 trace_current_buffer_unlock_commit(event, 0, 0); 283 if (!filter_current_check_discard(buffer, sys_data->exit_event,
197 trace_wake_up(); 284 entry, event))
285 trace_current_buffer_unlock_commit(buffer, event, 0, 0);
198} 286}
199 287
200static int init_syscall_tracer(struct trace_array *tr) 288int reg_event_syscall_enter(void *ptr)
201{ 289{
202 start_ftrace_syscalls(); 290 int ret = 0;
291 int num;
292 char *name;
293
294 name = (char *)ptr;
295 num = syscall_name_to_nr(name);
296 if (num < 0 || num >= NR_syscalls)
297 return -ENOSYS;
298 mutex_lock(&syscall_trace_lock);
299 if (!sys_refcount_enter)
300 ret = register_trace_sys_enter(ftrace_syscall_enter);
301 if (ret) {
302 pr_info("event trace: Could not activate"
303 "syscall entry trace point");
304 } else {
305 set_bit(num, enabled_enter_syscalls);
306 sys_refcount_enter++;
307 }
308 mutex_unlock(&syscall_trace_lock);
309 return ret;
310}
311
312void unreg_event_syscall_enter(void *ptr)
313{
314 int num;
315 char *name;
203 316
204 return 0; 317 name = (char *)ptr;
318 num = syscall_name_to_nr(name);
319 if (num < 0 || num >= NR_syscalls)
320 return;
321 mutex_lock(&syscall_trace_lock);
322 sys_refcount_enter--;
323 clear_bit(num, enabled_enter_syscalls);
324 if (!sys_refcount_enter)
325 unregister_trace_sys_enter(ftrace_syscall_enter);
326 mutex_unlock(&syscall_trace_lock);
205} 327}
206 328
207static void reset_syscall_tracer(struct trace_array *tr) 329int reg_event_syscall_exit(void *ptr)
208{ 330{
209 stop_ftrace_syscalls(); 331 int ret = 0;
210 tracing_reset_online_cpus(tr); 332 int num;
333 char *name;
334
335 name = (char *)ptr;
336 num = syscall_name_to_nr(name);
337 if (num < 0 || num >= NR_syscalls)
338 return -ENOSYS;
339 mutex_lock(&syscall_trace_lock);
340 if (!sys_refcount_exit)
341 ret = register_trace_sys_exit(ftrace_syscall_exit);
342 if (ret) {
343 pr_info("event trace: Could not activate"
344 "syscall exit trace point");
345 } else {
346 set_bit(num, enabled_exit_syscalls);
347 sys_refcount_exit++;
348 }
349 mutex_unlock(&syscall_trace_lock);
350 return ret;
211} 351}
212 352
213static struct trace_event syscall_enter_event = { 353void unreg_event_syscall_exit(void *ptr)
214 .type = TRACE_SYSCALL_ENTER, 354{
215 .trace = print_syscall_enter, 355 int num;
216}; 356 char *name;
357
358 name = (char *)ptr;
359 num = syscall_name_to_nr(name);
360 if (num < 0 || num >= NR_syscalls)
361 return;
362 mutex_lock(&syscall_trace_lock);
363 sys_refcount_exit--;
364 clear_bit(num, enabled_exit_syscalls);
365 if (!sys_refcount_exit)
366 unregister_trace_sys_exit(ftrace_syscall_exit);
367 mutex_unlock(&syscall_trace_lock);
368}
217 369
218static struct trace_event syscall_exit_event = { 370struct trace_event event_syscall_enter = {
219 .type = TRACE_SYSCALL_EXIT, 371 .trace = print_syscall_enter,
220 .trace = print_syscall_exit,
221}; 372};
222 373
223static struct tracer syscall_tracer __read_mostly = { 374struct trace_event event_syscall_exit = {
224 .name = "syscall", 375 .trace = print_syscall_exit,
225 .init = init_syscall_tracer,
226 .reset = reset_syscall_tracer,
227 .flags = &syscalls_flags,
228}; 376};
229 377
230__init int register_ftrace_syscalls(void) 378#ifdef CONFIG_EVENT_PROFILE
379
380static DECLARE_BITMAP(enabled_prof_enter_syscalls, NR_syscalls);
381static DECLARE_BITMAP(enabled_prof_exit_syscalls, NR_syscalls);
382static int sys_prof_refcount_enter;
383static int sys_prof_refcount_exit;
384
385static void prof_syscall_enter(struct pt_regs *regs, long id)
231{ 386{
232 int ret; 387 struct syscall_trace_enter *rec;
388 struct syscall_metadata *sys_data;
389 int syscall_nr;
390 int size;
233 391
234 ret = register_ftrace_event(&syscall_enter_event); 392 syscall_nr = syscall_get_nr(current, regs);
235 if (!ret) { 393 if (!test_bit(syscall_nr, enabled_prof_enter_syscalls))
236 printk(KERN_WARNING "event %d failed to register\n", 394 return;
237 syscall_enter_event.type); 395
238 WARN_ON_ONCE(1); 396 sys_data = syscall_nr_to_meta(syscall_nr);
397 if (!sys_data)
398 return;
399
400 /* get the size after alignment with the u32 buffer size field */
401 size = sizeof(unsigned long) * sys_data->nb_args + sizeof(*rec);
402 size = ALIGN(size + sizeof(u32), sizeof(u64));
403 size -= sizeof(u32);
404
405 do {
406 char raw_data[size];
407
408 /* zero the dead bytes from align to not leak stack to user */
409 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
410
411 rec = (struct syscall_trace_enter *) raw_data;
412 tracing_generic_entry_update(&rec->ent, 0, 0);
413 rec->ent.type = sys_data->enter_id;
414 rec->nr = syscall_nr;
415 syscall_get_arguments(current, regs, 0, sys_data->nb_args,
416 (unsigned long *)&rec->args);
417 perf_tpcounter_event(sys_data->enter_id, 0, 1, rec, size);
418 } while(0);
419}
420
421int reg_prof_syscall_enter(char *name)
422{
423 int ret = 0;
424 int num;
425
426 num = syscall_name_to_nr(name);
427 if (num < 0 || num >= NR_syscalls)
428 return -ENOSYS;
429
430 mutex_lock(&syscall_trace_lock);
431 if (!sys_prof_refcount_enter)
432 ret = register_trace_sys_enter(prof_syscall_enter);
433 if (ret) {
434 pr_info("event trace: Could not activate"
435 "syscall entry trace point");
436 } else {
437 set_bit(num, enabled_prof_enter_syscalls);
438 sys_prof_refcount_enter++;
239 } 439 }
440 mutex_unlock(&syscall_trace_lock);
441 return ret;
442}
240 443
241 ret = register_ftrace_event(&syscall_exit_event); 444void unreg_prof_syscall_enter(char *name)
242 if (!ret) { 445{
243 printk(KERN_WARNING "event %d failed to register\n", 446 int num;
244 syscall_exit_event.type); 447
245 WARN_ON_ONCE(1); 448 num = syscall_name_to_nr(name);
449 if (num < 0 || num >= NR_syscalls)
450 return;
451
452 mutex_lock(&syscall_trace_lock);
453 sys_prof_refcount_enter--;
454 clear_bit(num, enabled_prof_enter_syscalls);
455 if (!sys_prof_refcount_enter)
456 unregister_trace_sys_enter(prof_syscall_enter);
457 mutex_unlock(&syscall_trace_lock);
458}
459
460static void prof_syscall_exit(struct pt_regs *regs, long ret)
461{
462 struct syscall_metadata *sys_data;
463 struct syscall_trace_exit rec;
464 int syscall_nr;
465
466 syscall_nr = syscall_get_nr(current, regs);
467 if (!test_bit(syscall_nr, enabled_prof_exit_syscalls))
468 return;
469
470 sys_data = syscall_nr_to_meta(syscall_nr);
471 if (!sys_data)
472 return;
473
474 tracing_generic_entry_update(&rec.ent, 0, 0);
475 rec.ent.type = sys_data->exit_id;
476 rec.nr = syscall_nr;
477 rec.ret = syscall_get_return_value(current, regs);
478
479 perf_tpcounter_event(sys_data->exit_id, 0, 1, &rec, sizeof(rec));
480}
481
482int reg_prof_syscall_exit(char *name)
483{
484 int ret = 0;
485 int num;
486
487 num = syscall_name_to_nr(name);
488 if (num < 0 || num >= NR_syscalls)
489 return -ENOSYS;
490
491 mutex_lock(&syscall_trace_lock);
492 if (!sys_prof_refcount_exit)
493 ret = register_trace_sys_exit(prof_syscall_exit);
494 if (ret) {
495 pr_info("event trace: Could not activate"
496 "syscall entry trace point");
497 } else {
498 set_bit(num, enabled_prof_exit_syscalls);
499 sys_prof_refcount_exit++;
246 } 500 }
501 mutex_unlock(&syscall_trace_lock);
502 return ret;
503}
247 504
248 return register_tracer(&syscall_tracer); 505void unreg_prof_syscall_exit(char *name)
506{
507 int num;
508
509 num = syscall_name_to_nr(name);
510 if (num < 0 || num >= NR_syscalls)
511 return;
512
513 mutex_lock(&syscall_trace_lock);
514 sys_prof_refcount_exit--;
515 clear_bit(num, enabled_prof_exit_syscalls);
516 if (!sys_prof_refcount_exit)
517 unregister_trace_sys_exit(prof_syscall_exit);
518 mutex_unlock(&syscall_trace_lock);
249} 519}
250device_initcall(register_ftrace_syscalls); 520
521#endif
522
523
diff --git a/kernel/trace/trace_workqueue.c b/kernel/trace/trace_workqueue.c
index 97fcea4acce1..40cafb07dffd 100644
--- a/kernel/trace/trace_workqueue.c
+++ b/kernel/trace/trace_workqueue.c
@@ -9,6 +9,7 @@
9#include <trace/events/workqueue.h> 9#include <trace/events/workqueue.h>
10#include <linux/list.h> 10#include <linux/list.h>
11#include <linux/percpu.h> 11#include <linux/percpu.h>
12#include <linux/kref.h>
12#include "trace_stat.h" 13#include "trace_stat.h"
13#include "trace.h" 14#include "trace.h"
14 15
@@ -16,6 +17,7 @@
16/* A cpu workqueue thread */ 17/* A cpu workqueue thread */
17struct cpu_workqueue_stats { 18struct cpu_workqueue_stats {
18 struct list_head list; 19 struct list_head list;
20 struct kref kref;
19 int cpu; 21 int cpu;
20 pid_t pid; 22 pid_t pid;
21/* Can be inserted from interrupt or user context, need to be atomic */ 23/* Can be inserted from interrupt or user context, need to be atomic */
@@ -39,6 +41,11 @@ struct workqueue_global_stats {
39static DEFINE_PER_CPU(struct workqueue_global_stats, all_workqueue_stat); 41static DEFINE_PER_CPU(struct workqueue_global_stats, all_workqueue_stat);
40#define workqueue_cpu_stat(cpu) (&per_cpu(all_workqueue_stat, cpu)) 42#define workqueue_cpu_stat(cpu) (&per_cpu(all_workqueue_stat, cpu))
41 43
44static void cpu_workqueue_stat_free(struct kref *kref)
45{
46 kfree(container_of(kref, struct cpu_workqueue_stats, kref));
47}
48
42/* Insertion of a work */ 49/* Insertion of a work */
43static void 50static void
44probe_workqueue_insertion(struct task_struct *wq_thread, 51probe_workqueue_insertion(struct task_struct *wq_thread,
@@ -96,8 +103,8 @@ static void probe_workqueue_creation(struct task_struct *wq_thread, int cpu)
96 return; 103 return;
97 } 104 }
98 INIT_LIST_HEAD(&cws->list); 105 INIT_LIST_HEAD(&cws->list);
106 kref_init(&cws->kref);
99 cws->cpu = cpu; 107 cws->cpu = cpu;
100
101 cws->pid = wq_thread->pid; 108 cws->pid = wq_thread->pid;
102 109
103 spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags); 110 spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
@@ -118,7 +125,7 @@ static void probe_workqueue_destruction(struct task_struct *wq_thread)
118 list) { 125 list) {
119 if (node->pid == wq_thread->pid) { 126 if (node->pid == wq_thread->pid) {
120 list_del(&node->list); 127 list_del(&node->list);
121 kfree(node); 128 kref_put(&node->kref, cpu_workqueue_stat_free);
122 goto found; 129 goto found;
123 } 130 }
124 } 131 }
@@ -137,9 +144,11 @@ static struct cpu_workqueue_stats *workqueue_stat_start_cpu(int cpu)
137 144
138 spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags); 145 spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
139 146
140 if (!list_empty(&workqueue_cpu_stat(cpu)->list)) 147 if (!list_empty(&workqueue_cpu_stat(cpu)->list)) {
141 ret = list_entry(workqueue_cpu_stat(cpu)->list.next, 148 ret = list_entry(workqueue_cpu_stat(cpu)->list.next,
142 struct cpu_workqueue_stats, list); 149 struct cpu_workqueue_stats, list);
150 kref_get(&ret->kref);
151 }
143 152
144 spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags); 153 spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
145 154
@@ -162,9 +171,9 @@ static void *workqueue_stat_start(struct tracer_stat *trace)
162static void *workqueue_stat_next(void *prev, int idx) 171static void *workqueue_stat_next(void *prev, int idx)
163{ 172{
164 struct cpu_workqueue_stats *prev_cws = prev; 173 struct cpu_workqueue_stats *prev_cws = prev;
174 struct cpu_workqueue_stats *ret;
165 int cpu = prev_cws->cpu; 175 int cpu = prev_cws->cpu;
166 unsigned long flags; 176 unsigned long flags;
167 void *ret = NULL;
168 177
169 spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags); 178 spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
170 if (list_is_last(&prev_cws->list, &workqueue_cpu_stat(cpu)->list)) { 179 if (list_is_last(&prev_cws->list, &workqueue_cpu_stat(cpu)->list)) {
@@ -175,11 +184,14 @@ static void *workqueue_stat_next(void *prev, int idx)
175 return NULL; 184 return NULL;
176 } while (!(ret = workqueue_stat_start_cpu(cpu))); 185 } while (!(ret = workqueue_stat_start_cpu(cpu)));
177 return ret; 186 return ret;
187 } else {
188 ret = list_entry(prev_cws->list.next,
189 struct cpu_workqueue_stats, list);
190 kref_get(&ret->kref);
178 } 191 }
179 spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags); 192 spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
180 193
181 return list_entry(prev_cws->list.next, struct cpu_workqueue_stats, 194 return ret;
182 list);
183} 195}
184 196
185static int workqueue_stat_show(struct seq_file *s, void *p) 197static int workqueue_stat_show(struct seq_file *s, void *p)
@@ -203,6 +215,13 @@ static int workqueue_stat_show(struct seq_file *s, void *p)
203 return 0; 215 return 0;
204} 216}
205 217
218static void workqueue_stat_release(void *stat)
219{
220 struct cpu_workqueue_stats *node = stat;
221
222 kref_put(&node->kref, cpu_workqueue_stat_free);
223}
224
206static int workqueue_stat_headers(struct seq_file *s) 225static int workqueue_stat_headers(struct seq_file *s)
207{ 226{
208 seq_printf(s, "# CPU INSERTED EXECUTED NAME\n"); 227 seq_printf(s, "# CPU INSERTED EXECUTED NAME\n");
@@ -215,6 +234,7 @@ struct tracer_stat workqueue_stats __read_mostly = {
215 .stat_start = workqueue_stat_start, 234 .stat_start = workqueue_stat_start,
216 .stat_next = workqueue_stat_next, 235 .stat_next = workqueue_stat_next,
217 .stat_show = workqueue_stat_show, 236 .stat_show = workqueue_stat_show,
237 .stat_release = workqueue_stat_release,
218 .stat_headers = workqueue_stat_headers 238 .stat_headers = workqueue_stat_headers
219}; 239};
220 240