aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/trace
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/trace')
-rw-r--r--kernel/trace/Kconfig43
-rw-r--r--kernel/trace/Makefile2
-rw-r--r--kernel/trace/blktrace.c63
-rw-r--r--kernel/trace/ftrace.c323
-rw-r--r--kernel/trace/kmemtrace.c149
-rw-r--r--kernel/trace/power-traces.c20
-rw-r--r--kernel/trace/ring_buffer.c1140
-rw-r--r--kernel/trace/trace.c875
-rw-r--r--kernel/trace/trace.h359
-rw-r--r--kernel/trace/trace_boot.c20
-rw-r--r--kernel/trace/trace_clock.c24
-rw-r--r--kernel/trace/trace_entries.h366
-rw-r--r--kernel/trace/trace_event_profile.c87
-rw-r--r--kernel/trace/trace_event_types.h178
-rw-r--r--kernel/trace/trace_events.c287
-rw-r--r--kernel/trace/trace_events_filter.c298
-rw-r--r--kernel/trace/trace_export.c290
-rw-r--r--kernel/trace/trace_functions.c4
-rw-r--r--kernel/trace/trace_functions_graph.c239
-rw-r--r--kernel/trace/trace_hw_branches.c2
-rw-r--r--kernel/trace/trace_irqsoff.c19
-rw-r--r--kernel/trace/trace_mmiotrace.c16
-rw-r--r--kernel/trace/trace_output.c42
-rw-r--r--kernel/trace/trace_output.h2
-rw-r--r--kernel/trace/trace_power.c214
-rw-r--r--kernel/trace/trace_printk.c3
-rw-r--r--kernel/trace/trace_sched_switch.c59
-rw-r--r--kernel/trace/trace_sched_wakeup.c59
-rw-r--r--kernel/trace/trace_selftest.c1
-rw-r--r--kernel/trace/trace_stack.c54
-rw-r--r--kernel/trace/trace_stat.c51
-rw-r--r--kernel/trace/trace_stat.h2
-rw-r--r--kernel/trace/trace_syscalls.c530
-rw-r--r--kernel/trace/trace_workqueue.c32
34 files changed, 3673 insertions, 2180 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 019f380fd764..b416512ad17f 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -11,12 +11,18 @@ config NOP_TRACER
11 11
12config HAVE_FTRACE_NMI_ENTER 12config HAVE_FTRACE_NMI_ENTER
13 bool 13 bool
14 help
15 See Documentation/trace/ftrace-implementation.txt
14 16
15config HAVE_FUNCTION_TRACER 17config HAVE_FUNCTION_TRACER
16 bool 18 bool
19 help
20 See Documentation/trace/ftrace-implementation.txt
17 21
18config HAVE_FUNCTION_GRAPH_TRACER 22config HAVE_FUNCTION_GRAPH_TRACER
19 bool 23 bool
24 help
25 See Documentation/trace/ftrace-implementation.txt
20 26
21config HAVE_FUNCTION_GRAPH_FP_TEST 27config HAVE_FUNCTION_GRAPH_FP_TEST
22 bool 28 bool
@@ -28,21 +34,25 @@ config HAVE_FUNCTION_GRAPH_FP_TEST
28config HAVE_FUNCTION_TRACE_MCOUNT_TEST 34config HAVE_FUNCTION_TRACE_MCOUNT_TEST
29 bool 35 bool
30 help 36 help
31 This gets selected when the arch tests the function_trace_stop 37 See Documentation/trace/ftrace-implementation.txt
32 variable at the mcount call site. Otherwise, this variable
33 is tested by the called function.
34 38
35config HAVE_DYNAMIC_FTRACE 39config HAVE_DYNAMIC_FTRACE
36 bool 40 bool
41 help
42 See Documentation/trace/ftrace-implementation.txt
37 43
38config HAVE_FTRACE_MCOUNT_RECORD 44config HAVE_FTRACE_MCOUNT_RECORD
39 bool 45 bool
46 help
47 See Documentation/trace/ftrace-implementation.txt
40 48
41config HAVE_HW_BRANCH_TRACER 49config HAVE_HW_BRANCH_TRACER
42 bool 50 bool
43 51
44config HAVE_FTRACE_SYSCALLS 52config HAVE_SYSCALL_TRACEPOINTS
45 bool 53 bool
54 help
55 See Documentation/trace/ftrace-implementation.txt
46 56
47config TRACER_MAX_TRACE 57config TRACER_MAX_TRACE
48 bool 58 bool
@@ -60,15 +70,20 @@ config EVENT_TRACING
60 bool 70 bool
61 71
62config CONTEXT_SWITCH_TRACER 72config CONTEXT_SWITCH_TRACER
63 select MARKERS
64 bool 73 bool
65 74
75config RING_BUFFER_ALLOW_SWAP
76 bool
77 help
78 Allow the use of ring_buffer_swap_cpu.
79 Adds a very slight overhead to tracing when enabled.
80
66# All tracer options should select GENERIC_TRACER. For those options that are 81# All tracer options should select GENERIC_TRACER. For those options that are
67# enabled by all tracers (context switch and event tracer) they select TRACING. 82# enabled by all tracers (context switch and event tracer) they select TRACING.
68# This allows those options to appear when no other tracer is selected. But the 83# This allows those options to appear when no other tracer is selected. But the
69# options do not appear when something else selects it. We need the two options 84# options do not appear when something else selects it. We need the two options
70# GENERIC_TRACER and TRACING to avoid circular dependencies to accomplish the 85# GENERIC_TRACER and TRACING to avoid circular dependencies to accomplish the
71# hidding of the automatic options options. 86# hidding of the automatic options.
72 87
73config TRACING 88config TRACING
74 bool 89 bool
@@ -147,6 +162,7 @@ config IRQSOFF_TRACER
147 select TRACE_IRQFLAGS 162 select TRACE_IRQFLAGS
148 select GENERIC_TRACER 163 select GENERIC_TRACER
149 select TRACER_MAX_TRACE 164 select TRACER_MAX_TRACE
165 select RING_BUFFER_ALLOW_SWAP
150 help 166 help
151 This option measures the time spent in irqs-off critical 167 This option measures the time spent in irqs-off critical
152 sections, with microsecond accuracy. 168 sections, with microsecond accuracy.
@@ -168,6 +184,7 @@ config PREEMPT_TRACER
168 depends on PREEMPT 184 depends on PREEMPT
169 select GENERIC_TRACER 185 select GENERIC_TRACER
170 select TRACER_MAX_TRACE 186 select TRACER_MAX_TRACE
187 select RING_BUFFER_ALLOW_SWAP
171 help 188 help
172 This option measures the time spent in preemption off critical 189 This option measures the time spent in preemption off critical
173 sections, with microsecond accuracy. 190 sections, with microsecond accuracy.
@@ -211,7 +228,7 @@ config ENABLE_DEFAULT_TRACERS
211 228
212config FTRACE_SYSCALLS 229config FTRACE_SYSCALLS
213 bool "Trace syscalls" 230 bool "Trace syscalls"
214 depends on HAVE_FTRACE_SYSCALLS 231 depends on HAVE_SYSCALL_TRACEPOINTS
215 select GENERIC_TRACER 232 select GENERIC_TRACER
216 select KALLSYMS 233 select KALLSYMS
217 help 234 help
@@ -462,6 +479,18 @@ config FTRACE_STARTUP_TEST
462 functioning properly. It will do tests on all the configured 479 functioning properly. It will do tests on all the configured
463 tracers of ftrace. 480 tracers of ftrace.
464 481
482config EVENT_TRACE_TEST_SYSCALLS
483 bool "Run selftest on syscall events"
484 depends on FTRACE_STARTUP_TEST
485 help
486 This option will also enable testing every syscall event.
487 It only enables the event and disables it and runs various loads
488 with the event enabled. This adds a bit more time for kernel boot
489 up since it runs this on every system call defined.
490
491 TBD - enable a way to actually call the syscalls as we test their
492 events
493
465config MMIOTRACE 494config MMIOTRACE
466 bool "Memory mapped IO tracing" 495 bool "Memory mapped IO tracing"
467 depends on HAVE_MMIOTRACE_SUPPORT && PCI 496 depends on HAVE_MMIOTRACE_SUPPORT && PCI
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 844164dca90a..26f03ac07c2b 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -42,7 +42,6 @@ obj-$(CONFIG_BOOT_TRACER) += trace_boot.o
42obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += trace_functions_graph.o 42obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += trace_functions_graph.o
43obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o 43obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o
44obj-$(CONFIG_HW_BRANCH_TRACER) += trace_hw_branches.o 44obj-$(CONFIG_HW_BRANCH_TRACER) += trace_hw_branches.o
45obj-$(CONFIG_POWER_TRACER) += trace_power.o
46obj-$(CONFIG_KMEMTRACE) += kmemtrace.o 45obj-$(CONFIG_KMEMTRACE) += kmemtrace.o
47obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o 46obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o
48obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o 47obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o
@@ -54,5 +53,6 @@ obj-$(CONFIG_EVENT_TRACING) += trace_export.o
54obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o 53obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o
55obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o 54obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o
56obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o 55obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
56obj-$(CONFIG_EVENT_TRACING) += power-traces.o
57 57
58libftrace-y := ftrace.o 58libftrace-y := ftrace.o
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 1090b0aed9ba..d9d6206e0b14 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -65,13 +65,15 @@ static void trace_note(struct blk_trace *bt, pid_t pid, int action,
65{ 65{
66 struct blk_io_trace *t; 66 struct blk_io_trace *t;
67 struct ring_buffer_event *event = NULL; 67 struct ring_buffer_event *event = NULL;
68 struct ring_buffer *buffer = NULL;
68 int pc = 0; 69 int pc = 0;
69 int cpu = smp_processor_id(); 70 int cpu = smp_processor_id();
70 bool blk_tracer = blk_tracer_enabled; 71 bool blk_tracer = blk_tracer_enabled;
71 72
72 if (blk_tracer) { 73 if (blk_tracer) {
74 buffer = blk_tr->buffer;
73 pc = preempt_count(); 75 pc = preempt_count();
74 event = trace_buffer_lock_reserve(blk_tr, TRACE_BLK, 76 event = trace_buffer_lock_reserve(buffer, TRACE_BLK,
75 sizeof(*t) + len, 77 sizeof(*t) + len,
76 0, pc); 78 0, pc);
77 if (!event) 79 if (!event)
@@ -96,7 +98,7 @@ record_it:
96 memcpy((void *) t + sizeof(*t), data, len); 98 memcpy((void *) t + sizeof(*t), data, len);
97 99
98 if (blk_tracer) 100 if (blk_tracer)
99 trace_buffer_unlock_commit(blk_tr, event, 0, pc); 101 trace_buffer_unlock_commit(buffer, event, 0, pc);
100 } 102 }
101} 103}
102 104
@@ -179,6 +181,7 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
179{ 181{
180 struct task_struct *tsk = current; 182 struct task_struct *tsk = current;
181 struct ring_buffer_event *event = NULL; 183 struct ring_buffer_event *event = NULL;
184 struct ring_buffer *buffer = NULL;
182 struct blk_io_trace *t; 185 struct blk_io_trace *t;
183 unsigned long flags = 0; 186 unsigned long flags = 0;
184 unsigned long *sequence; 187 unsigned long *sequence;
@@ -204,8 +207,9 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
204 if (blk_tracer) { 207 if (blk_tracer) {
205 tracing_record_cmdline(current); 208 tracing_record_cmdline(current);
206 209
210 buffer = blk_tr->buffer;
207 pc = preempt_count(); 211 pc = preempt_count();
208 event = trace_buffer_lock_reserve(blk_tr, TRACE_BLK, 212 event = trace_buffer_lock_reserve(buffer, TRACE_BLK,
209 sizeof(*t) + pdu_len, 213 sizeof(*t) + pdu_len,
210 0, pc); 214 0, pc);
211 if (!event) 215 if (!event)
@@ -252,7 +256,7 @@ record_it:
252 memcpy((void *) t + sizeof(*t), pdu_data, pdu_len); 256 memcpy((void *) t + sizeof(*t), pdu_data, pdu_len);
253 257
254 if (blk_tracer) { 258 if (blk_tracer) {
255 trace_buffer_unlock_commit(blk_tr, event, 0, pc); 259 trace_buffer_unlock_commit(buffer, event, 0, pc);
256 return; 260 return;
257 } 261 }
258 } 262 }
@@ -267,8 +271,8 @@ static void blk_trace_free(struct blk_trace *bt)
267{ 271{
268 debugfs_remove(bt->msg_file); 272 debugfs_remove(bt->msg_file);
269 debugfs_remove(bt->dropped_file); 273 debugfs_remove(bt->dropped_file);
270 debugfs_remove(bt->dir);
271 relay_close(bt->rchan); 274 relay_close(bt->rchan);
275 debugfs_remove(bt->dir);
272 free_percpu(bt->sequence); 276 free_percpu(bt->sequence);
273 free_percpu(bt->msg_data); 277 free_percpu(bt->msg_data);
274 kfree(bt); 278 kfree(bt);
@@ -378,18 +382,8 @@ static int blk_subbuf_start_callback(struct rchan_buf *buf, void *subbuf,
378 382
379static int blk_remove_buf_file_callback(struct dentry *dentry) 383static int blk_remove_buf_file_callback(struct dentry *dentry)
380{ 384{
381 struct dentry *parent = dentry->d_parent;
382 debugfs_remove(dentry); 385 debugfs_remove(dentry);
383 386
384 /*
385 * this will fail for all but the last file, but that is ok. what we
386 * care about is the top level buts->name directory going away, when
387 * the last trace file is gone. Then we don't have to rmdir() that
388 * manually on trace stop, so it nicely solves the issue with
389 * force killing of running traces.
390 */
391
392 debugfs_remove(parent);
393 return 0; 387 return 0;
394} 388}
395 389
@@ -862,6 +856,37 @@ static void blk_add_trace_remap(struct request_queue *q, struct bio *bio,
862} 856}
863 857
864/** 858/**
859 * blk_add_trace_rq_remap - Add a trace for a request-remap operation
860 * @q: queue the io is for
861 * @rq: the source request
862 * @dev: target device
863 * @from: source sector
864 *
865 * Description:
866 * Device mapper remaps request to other devices.
867 * Add a trace for that action.
868 *
869 **/
870static void blk_add_trace_rq_remap(struct request_queue *q,
871 struct request *rq, dev_t dev,
872 sector_t from)
873{
874 struct blk_trace *bt = q->blk_trace;
875 struct blk_io_trace_remap r;
876
877 if (likely(!bt))
878 return;
879
880 r.device_from = cpu_to_be32(dev);
881 r.device_to = cpu_to_be32(disk_devt(rq->rq_disk));
882 r.sector_from = cpu_to_be64(from);
883
884 __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq),
885 rq_data_dir(rq), BLK_TA_REMAP, !!rq->errors,
886 sizeof(r), &r);
887}
888
889/**
865 * blk_add_driver_data - Add binary message with driver-specific data 890 * blk_add_driver_data - Add binary message with driver-specific data
866 * @q: queue the io is for 891 * @q: queue the io is for
867 * @rq: io request 892 * @rq: io request
@@ -928,10 +953,13 @@ static void blk_register_tracepoints(void)
928 WARN_ON(ret); 953 WARN_ON(ret);
929 ret = register_trace_block_remap(blk_add_trace_remap); 954 ret = register_trace_block_remap(blk_add_trace_remap);
930 WARN_ON(ret); 955 WARN_ON(ret);
956 ret = register_trace_block_rq_remap(blk_add_trace_rq_remap);
957 WARN_ON(ret);
931} 958}
932 959
933static void blk_unregister_tracepoints(void) 960static void blk_unregister_tracepoints(void)
934{ 961{
962 unregister_trace_block_rq_remap(blk_add_trace_rq_remap);
935 unregister_trace_block_remap(blk_add_trace_remap); 963 unregister_trace_block_remap(blk_add_trace_remap);
936 unregister_trace_block_split(blk_add_trace_split); 964 unregister_trace_block_split(blk_add_trace_split);
937 unregister_trace_block_unplug_io(blk_add_trace_unplug_io); 965 unregister_trace_block_unplug_io(blk_add_trace_unplug_io);
@@ -1663,6 +1691,11 @@ int blk_trace_init_sysfs(struct device *dev)
1663 return sysfs_create_group(&dev->kobj, &blk_trace_attr_group); 1691 return sysfs_create_group(&dev->kobj, &blk_trace_attr_group);
1664} 1692}
1665 1693
1694void blk_trace_remove_sysfs(struct device *dev)
1695{
1696 sysfs_remove_group(&dev->kobj, &blk_trace_attr_group);
1697}
1698
1666#endif /* CONFIG_BLK_DEV_IO_TRACE */ 1699#endif /* CONFIG_BLK_DEV_IO_TRACE */
1667 1700
1668#ifdef CONFIG_EVENT_TRACING 1701#ifdef CONFIG_EVENT_TRACING
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 4521c77d1a1a..46592feab5a6 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1016,71 +1016,35 @@ static int
1016__ftrace_replace_code(struct dyn_ftrace *rec, int enable) 1016__ftrace_replace_code(struct dyn_ftrace *rec, int enable)
1017{ 1017{
1018 unsigned long ftrace_addr; 1018 unsigned long ftrace_addr;
1019 unsigned long ip, fl; 1019 unsigned long flag = 0UL;
1020 1020
1021 ftrace_addr = (unsigned long)FTRACE_ADDR; 1021 ftrace_addr = (unsigned long)FTRACE_ADDR;
1022 1022
1023 ip = rec->ip;
1024
1025 /* 1023 /*
1026 * If this record is not to be traced and 1024 * If this record is not to be traced or we want to disable it,
1027 * it is not enabled then do nothing. 1025 * then disable it.
1028 * 1026 *
1029 * If this record is not to be traced and 1027 * If we want to enable it and filtering is off, then enable it.
1030 * it is enabled then disable it.
1031 * 1028 *
1029 * If we want to enable it and filtering is on, enable it only if
1030 * it's filtered
1032 */ 1031 */
1033 if (rec->flags & FTRACE_FL_NOTRACE) { 1032 if (enable && !(rec->flags & FTRACE_FL_NOTRACE)) {
1034 if (rec->flags & FTRACE_FL_ENABLED) 1033 if (!ftrace_filtered || (rec->flags & FTRACE_FL_FILTER))
1035 rec->flags &= ~FTRACE_FL_ENABLED; 1034 flag = FTRACE_FL_ENABLED;
1036 else 1035 }
1037 return 0;
1038
1039 } else if (ftrace_filtered && enable) {
1040 /*
1041 * Filtering is on:
1042 */
1043
1044 fl = rec->flags & (FTRACE_FL_FILTER | FTRACE_FL_ENABLED);
1045
1046 /* Record is filtered and enabled, do nothing */
1047 if (fl == (FTRACE_FL_FILTER | FTRACE_FL_ENABLED))
1048 return 0;
1049
1050 /* Record is not filtered or enabled, do nothing */
1051 if (!fl)
1052 return 0;
1053
1054 /* Record is not filtered but enabled, disable it */
1055 if (fl == FTRACE_FL_ENABLED)
1056 rec->flags &= ~FTRACE_FL_ENABLED;
1057 else
1058 /* Otherwise record is filtered but not enabled, enable it */
1059 rec->flags |= FTRACE_FL_ENABLED;
1060 } else {
1061 /* Disable or not filtered */
1062
1063 if (enable) {
1064 /* if record is enabled, do nothing */
1065 if (rec->flags & FTRACE_FL_ENABLED)
1066 return 0;
1067
1068 rec->flags |= FTRACE_FL_ENABLED;
1069
1070 } else {
1071 1036
1072 /* if record is not enabled, do nothing */ 1037 /* If the state of this record hasn't changed, then do nothing */
1073 if (!(rec->flags & FTRACE_FL_ENABLED)) 1038 if ((rec->flags & FTRACE_FL_ENABLED) == flag)
1074 return 0; 1039 return 0;
1075 1040
1076 rec->flags &= ~FTRACE_FL_ENABLED; 1041 if (flag) {
1077 } 1042 rec->flags |= FTRACE_FL_ENABLED;
1043 return ftrace_make_call(rec, ftrace_addr);
1078 } 1044 }
1079 1045
1080 if (rec->flags & FTRACE_FL_ENABLED) 1046 rec->flags &= ~FTRACE_FL_ENABLED;
1081 return ftrace_make_call(rec, ftrace_addr); 1047 return ftrace_make_nop(NULL, rec, ftrace_addr);
1082 else
1083 return ftrace_make_nop(NULL, rec, ftrace_addr);
1084} 1048}
1085 1049
1086static void ftrace_replace_code(int enable) 1050static void ftrace_replace_code(int enable)
@@ -1359,11 +1323,10 @@ static int __init ftrace_dyn_table_alloc(unsigned long num_to_init)
1359 1323
1360enum { 1324enum {
1361 FTRACE_ITER_FILTER = (1 << 0), 1325 FTRACE_ITER_FILTER = (1 << 0),
1362 FTRACE_ITER_CONT = (1 << 1), 1326 FTRACE_ITER_NOTRACE = (1 << 1),
1363 FTRACE_ITER_NOTRACE = (1 << 2), 1327 FTRACE_ITER_FAILURES = (1 << 2),
1364 FTRACE_ITER_FAILURES = (1 << 3), 1328 FTRACE_ITER_PRINTALL = (1 << 3),
1365 FTRACE_ITER_PRINTALL = (1 << 4), 1329 FTRACE_ITER_HASH = (1 << 4),
1366 FTRACE_ITER_HASH = (1 << 5),
1367}; 1330};
1368 1331
1369#define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */ 1332#define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */
@@ -1373,9 +1336,7 @@ struct ftrace_iterator {
1373 int hidx; 1336 int hidx;
1374 int idx; 1337 int idx;
1375 unsigned flags; 1338 unsigned flags;
1376 unsigned char buffer[FTRACE_BUFF_MAX+1]; 1339 struct trace_parser parser;
1377 unsigned buffer_idx;
1378 unsigned filtered;
1379}; 1340};
1380 1341
1381static void * 1342static void *
@@ -1438,18 +1399,13 @@ static int t_hash_show(struct seq_file *m, void *v)
1438{ 1399{
1439 struct ftrace_func_probe *rec; 1400 struct ftrace_func_probe *rec;
1440 struct hlist_node *hnd = v; 1401 struct hlist_node *hnd = v;
1441 char str[KSYM_SYMBOL_LEN];
1442 1402
1443 rec = hlist_entry(hnd, struct ftrace_func_probe, node); 1403 rec = hlist_entry(hnd, struct ftrace_func_probe, node);
1444 1404
1445 if (rec->ops->print) 1405 if (rec->ops->print)
1446 return rec->ops->print(m, rec->ip, rec->ops, rec->data); 1406 return rec->ops->print(m, rec->ip, rec->ops, rec->data);
1447 1407
1448 kallsyms_lookup(rec->ip, NULL, NULL, NULL, str); 1408 seq_printf(m, "%ps:%ps", (void *)rec->ip, (void *)rec->ops->func);
1449 seq_printf(m, "%s:", str);
1450
1451 kallsyms_lookup((unsigned long)rec->ops->func, NULL, NULL, NULL, str);
1452 seq_printf(m, "%s", str);
1453 1409
1454 if (rec->data) 1410 if (rec->data)
1455 seq_printf(m, ":%p", rec->data); 1411 seq_printf(m, ":%p", rec->data);
@@ -1547,7 +1503,6 @@ static int t_show(struct seq_file *m, void *v)
1547{ 1503{
1548 struct ftrace_iterator *iter = m->private; 1504 struct ftrace_iterator *iter = m->private;
1549 struct dyn_ftrace *rec = v; 1505 struct dyn_ftrace *rec = v;
1550 char str[KSYM_SYMBOL_LEN];
1551 1506
1552 if (iter->flags & FTRACE_ITER_HASH) 1507 if (iter->flags & FTRACE_ITER_HASH)
1553 return t_hash_show(m, v); 1508 return t_hash_show(m, v);
@@ -1560,14 +1515,12 @@ static int t_show(struct seq_file *m, void *v)
1560 if (!rec) 1515 if (!rec)
1561 return 0; 1516 return 0;
1562 1517
1563 kallsyms_lookup(rec->ip, NULL, NULL, NULL, str); 1518 seq_printf(m, "%ps\n", (void *)rec->ip);
1564
1565 seq_printf(m, "%s\n", str);
1566 1519
1567 return 0; 1520 return 0;
1568} 1521}
1569 1522
1570static struct seq_operations show_ftrace_seq_ops = { 1523static const struct seq_operations show_ftrace_seq_ops = {
1571 .start = t_start, 1524 .start = t_start,
1572 .next = t_next, 1525 .next = t_next,
1573 .stop = t_stop, 1526 .stop = t_stop,
@@ -1601,17 +1554,6 @@ ftrace_avail_open(struct inode *inode, struct file *file)
1601 return ret; 1554 return ret;
1602} 1555}
1603 1556
1604int ftrace_avail_release(struct inode *inode, struct file *file)
1605{
1606 struct seq_file *m = (struct seq_file *)file->private_data;
1607 struct ftrace_iterator *iter = m->private;
1608
1609 seq_release(inode, file);
1610 kfree(iter);
1611
1612 return 0;
1613}
1614
1615static int 1557static int
1616ftrace_failures_open(struct inode *inode, struct file *file) 1558ftrace_failures_open(struct inode *inode, struct file *file)
1617{ 1559{
@@ -1660,9 +1602,14 @@ ftrace_regex_open(struct inode *inode, struct file *file, int enable)
1660 if (!iter) 1602 if (!iter)
1661 return -ENOMEM; 1603 return -ENOMEM;
1662 1604
1605 if (trace_parser_get_init(&iter->parser, FTRACE_BUFF_MAX)) {
1606 kfree(iter);
1607 return -ENOMEM;
1608 }
1609
1663 mutex_lock(&ftrace_regex_lock); 1610 mutex_lock(&ftrace_regex_lock);
1664 if ((file->f_mode & FMODE_WRITE) && 1611 if ((file->f_mode & FMODE_WRITE) &&
1665 !(file->f_flags & O_APPEND)) 1612 (file->f_flags & O_TRUNC))
1666 ftrace_filter_reset(enable); 1613 ftrace_filter_reset(enable);
1667 1614
1668 if (file->f_mode & FMODE_READ) { 1615 if (file->f_mode & FMODE_READ) {
@@ -1674,8 +1621,10 @@ ftrace_regex_open(struct inode *inode, struct file *file, int enable)
1674 if (!ret) { 1621 if (!ret) {
1675 struct seq_file *m = file->private_data; 1622 struct seq_file *m = file->private_data;
1676 m->private = iter; 1623 m->private = iter;
1677 } else 1624 } else {
1625 trace_parser_put(&iter->parser);
1678 kfree(iter); 1626 kfree(iter);
1627 }
1679 } else 1628 } else
1680 file->private_data = iter; 1629 file->private_data = iter;
1681 mutex_unlock(&ftrace_regex_lock); 1630 mutex_unlock(&ftrace_regex_lock);
@@ -2115,9 +2064,9 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
2115 int i, len = 0; 2064 int i, len = 0;
2116 char *search; 2065 char *search;
2117 2066
2118 if (glob && (strcmp(glob, "*") || !strlen(glob))) 2067 if (glob && (strcmp(glob, "*") == 0 || !strlen(glob)))
2119 glob = NULL; 2068 glob = NULL;
2120 else { 2069 else if (glob) {
2121 int not; 2070 int not;
2122 2071
2123 type = ftrace_setup_glob(glob, strlen(glob), &search, &not); 2072 type = ftrace_setup_glob(glob, strlen(glob), &search, &not);
@@ -2252,11 +2201,10 @@ ftrace_regex_write(struct file *file, const char __user *ubuf,
2252 size_t cnt, loff_t *ppos, int enable) 2201 size_t cnt, loff_t *ppos, int enable)
2253{ 2202{
2254 struct ftrace_iterator *iter; 2203 struct ftrace_iterator *iter;
2255 char ch; 2204 struct trace_parser *parser;
2256 size_t read = 0; 2205 ssize_t ret, read;
2257 ssize_t ret;
2258 2206
2259 if (!cnt || cnt < 0) 2207 if (!cnt)
2260 return 0; 2208 return 0;
2261 2209
2262 mutex_lock(&ftrace_regex_lock); 2210 mutex_lock(&ftrace_regex_lock);
@@ -2267,68 +2215,23 @@ ftrace_regex_write(struct file *file, const char __user *ubuf,
2267 } else 2215 } else
2268 iter = file->private_data; 2216 iter = file->private_data;
2269 2217
2270 if (!*ppos) { 2218 parser = &iter->parser;
2271 iter->flags &= ~FTRACE_ITER_CONT; 2219 read = trace_get_user(parser, ubuf, cnt, ppos);
2272 iter->buffer_idx = 0;
2273 }
2274
2275 ret = get_user(ch, ubuf++);
2276 if (ret)
2277 goto out;
2278 read++;
2279 cnt--;
2280
2281 if (!(iter->flags & ~FTRACE_ITER_CONT)) {
2282 /* skip white space */
2283 while (cnt && isspace(ch)) {
2284 ret = get_user(ch, ubuf++);
2285 if (ret)
2286 goto out;
2287 read++;
2288 cnt--;
2289 }
2290
2291 if (isspace(ch)) {
2292 file->f_pos += read;
2293 ret = read;
2294 goto out;
2295 }
2296
2297 iter->buffer_idx = 0;
2298 }
2299 2220
2300 while (cnt && !isspace(ch)) { 2221 if (read >= 0 && trace_parser_loaded(parser) &&
2301 if (iter->buffer_idx < FTRACE_BUFF_MAX) 2222 !trace_parser_cont(parser)) {
2302 iter->buffer[iter->buffer_idx++] = ch; 2223 ret = ftrace_process_regex(parser->buffer,
2303 else { 2224 parser->idx, enable);
2304 ret = -EINVAL;
2305 goto out;
2306 }
2307 ret = get_user(ch, ubuf++);
2308 if (ret) 2225 if (ret)
2309 goto out; 2226 goto out;
2310 read++;
2311 cnt--;
2312 }
2313
2314 if (isspace(ch)) {
2315 iter->filtered++;
2316 iter->buffer[iter->buffer_idx] = 0;
2317 ret = ftrace_process_regex(iter->buffer,
2318 iter->buffer_idx, enable);
2319 if (ret)
2320 goto out;
2321 iter->buffer_idx = 0;
2322 } else
2323 iter->flags |= FTRACE_ITER_CONT;
2324 2227
2325 2228 trace_parser_clear(parser);
2326 file->f_pos += read; 2229 }
2327 2230
2328 ret = read; 2231 ret = read;
2329 out:
2330 mutex_unlock(&ftrace_regex_lock);
2331 2232
2233 mutex_unlock(&ftrace_regex_lock);
2234out:
2332 return ret; 2235 return ret;
2333} 2236}
2334 2237
@@ -2433,6 +2336,7 @@ ftrace_regex_release(struct inode *inode, struct file *file, int enable)
2433{ 2336{
2434 struct seq_file *m = (struct seq_file *)file->private_data; 2337 struct seq_file *m = (struct seq_file *)file->private_data;
2435 struct ftrace_iterator *iter; 2338 struct ftrace_iterator *iter;
2339 struct trace_parser *parser;
2436 2340
2437 mutex_lock(&ftrace_regex_lock); 2341 mutex_lock(&ftrace_regex_lock);
2438 if (file->f_mode & FMODE_READ) { 2342 if (file->f_mode & FMODE_READ) {
@@ -2442,10 +2346,10 @@ ftrace_regex_release(struct inode *inode, struct file *file, int enable)
2442 } else 2346 } else
2443 iter = file->private_data; 2347 iter = file->private_data;
2444 2348
2445 if (iter->buffer_idx) { 2349 parser = &iter->parser;
2446 iter->filtered++; 2350 if (trace_parser_loaded(parser)) {
2447 iter->buffer[iter->buffer_idx] = 0; 2351 parser->buffer[parser->idx] = 0;
2448 ftrace_match_records(iter->buffer, iter->buffer_idx, enable); 2352 ftrace_match_records(parser->buffer, parser->idx, enable);
2449 } 2353 }
2450 2354
2451 mutex_lock(&ftrace_lock); 2355 mutex_lock(&ftrace_lock);
@@ -2453,7 +2357,9 @@ ftrace_regex_release(struct inode *inode, struct file *file, int enable)
2453 ftrace_run_update_code(FTRACE_ENABLE_CALLS); 2357 ftrace_run_update_code(FTRACE_ENABLE_CALLS);
2454 mutex_unlock(&ftrace_lock); 2358 mutex_unlock(&ftrace_lock);
2455 2359
2360 trace_parser_put(parser);
2456 kfree(iter); 2361 kfree(iter);
2362
2457 mutex_unlock(&ftrace_regex_lock); 2363 mutex_unlock(&ftrace_regex_lock);
2458 return 0; 2364 return 0;
2459} 2365}
@@ -2474,14 +2380,14 @@ static const struct file_operations ftrace_avail_fops = {
2474 .open = ftrace_avail_open, 2380 .open = ftrace_avail_open,
2475 .read = seq_read, 2381 .read = seq_read,
2476 .llseek = seq_lseek, 2382 .llseek = seq_lseek,
2477 .release = ftrace_avail_release, 2383 .release = seq_release_private,
2478}; 2384};
2479 2385
2480static const struct file_operations ftrace_failures_fops = { 2386static const struct file_operations ftrace_failures_fops = {
2481 .open = ftrace_failures_open, 2387 .open = ftrace_failures_open,
2482 .read = seq_read, 2388 .read = seq_read,
2483 .llseek = seq_lseek, 2389 .llseek = seq_lseek,
2484 .release = ftrace_avail_release, 2390 .release = seq_release_private,
2485}; 2391};
2486 2392
2487static const struct file_operations ftrace_filter_fops = { 2393static const struct file_operations ftrace_filter_fops = {
@@ -2510,11 +2416,9 @@ unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS] __read_mostly;
2510static void * 2416static void *
2511__g_next(struct seq_file *m, loff_t *pos) 2417__g_next(struct seq_file *m, loff_t *pos)
2512{ 2418{
2513 unsigned long *array = m->private;
2514
2515 if (*pos >= ftrace_graph_count) 2419 if (*pos >= ftrace_graph_count)
2516 return NULL; 2420 return NULL;
2517 return &array[*pos]; 2421 return &ftrace_graph_funcs[*pos];
2518} 2422}
2519 2423
2520static void * 2424static void *
@@ -2543,7 +2447,6 @@ static void g_stop(struct seq_file *m, void *p)
2543static int g_show(struct seq_file *m, void *v) 2447static int g_show(struct seq_file *m, void *v)
2544{ 2448{
2545 unsigned long *ptr = v; 2449 unsigned long *ptr = v;
2546 char str[KSYM_SYMBOL_LEN];
2547 2450
2548 if (!ptr) 2451 if (!ptr)
2549 return 0; 2452 return 0;
@@ -2553,14 +2456,12 @@ static int g_show(struct seq_file *m, void *v)
2553 return 0; 2456 return 0;
2554 } 2457 }
2555 2458
2556 kallsyms_lookup(*ptr, NULL, NULL, NULL, str); 2459 seq_printf(m, "%ps\n", (void *)*ptr);
2557
2558 seq_printf(m, "%s\n", str);
2559 2460
2560 return 0; 2461 return 0;
2561} 2462}
2562 2463
2563static struct seq_operations ftrace_graph_seq_ops = { 2464static const struct seq_operations ftrace_graph_seq_ops = {
2564 .start = g_start, 2465 .start = g_start,
2565 .next = g_next, 2466 .next = g_next,
2566 .stop = g_stop, 2467 .stop = g_stop,
@@ -2577,25 +2478,27 @@ ftrace_graph_open(struct inode *inode, struct file *file)
2577 2478
2578 mutex_lock(&graph_lock); 2479 mutex_lock(&graph_lock);
2579 if ((file->f_mode & FMODE_WRITE) && 2480 if ((file->f_mode & FMODE_WRITE) &&
2580 !(file->f_flags & O_APPEND)) { 2481 (file->f_flags & O_TRUNC)) {
2581 ftrace_graph_count = 0; 2482 ftrace_graph_count = 0;
2582 memset(ftrace_graph_funcs, 0, sizeof(ftrace_graph_funcs)); 2483 memset(ftrace_graph_funcs, 0, sizeof(ftrace_graph_funcs));
2583 } 2484 }
2485 mutex_unlock(&graph_lock);
2584 2486
2585 if (file->f_mode & FMODE_READ) { 2487 if (file->f_mode & FMODE_READ)
2586 ret = seq_open(file, &ftrace_graph_seq_ops); 2488 ret = seq_open(file, &ftrace_graph_seq_ops);
2587 if (!ret) {
2588 struct seq_file *m = file->private_data;
2589 m->private = ftrace_graph_funcs;
2590 }
2591 } else
2592 file->private_data = ftrace_graph_funcs;
2593 mutex_unlock(&graph_lock);
2594 2489
2595 return ret; 2490 return ret;
2596} 2491}
2597 2492
2598static int 2493static int
2494ftrace_graph_release(struct inode *inode, struct file *file)
2495{
2496 if (file->f_mode & FMODE_READ)
2497 seq_release(inode, file);
2498 return 0;
2499}
2500
2501static int
2599ftrace_set_func(unsigned long *array, int *idx, char *buffer) 2502ftrace_set_func(unsigned long *array, int *idx, char *buffer)
2600{ 2503{
2601 struct dyn_ftrace *rec; 2504 struct dyn_ftrace *rec;
@@ -2650,12 +2553,8 @@ static ssize_t
2650ftrace_graph_write(struct file *file, const char __user *ubuf, 2553ftrace_graph_write(struct file *file, const char __user *ubuf,
2651 size_t cnt, loff_t *ppos) 2554 size_t cnt, loff_t *ppos)
2652{ 2555{
2653 unsigned char buffer[FTRACE_BUFF_MAX+1]; 2556 struct trace_parser parser;
2654 unsigned long *array; 2557 ssize_t read, ret;
2655 size_t read = 0;
2656 ssize_t ret;
2657 int index = 0;
2658 char ch;
2659 2558
2660 if (!cnt || cnt < 0) 2559 if (!cnt || cnt < 0)
2661 return 0; 2560 return 0;
@@ -2664,69 +2563,41 @@ ftrace_graph_write(struct file *file, const char __user *ubuf,
2664 2563
2665 if (ftrace_graph_count >= FTRACE_GRAPH_MAX_FUNCS) { 2564 if (ftrace_graph_count >= FTRACE_GRAPH_MAX_FUNCS) {
2666 ret = -EBUSY; 2565 ret = -EBUSY;
2667 goto out; 2566 goto out_unlock;
2668 } 2567 }
2669 2568
2670 if (file->f_mode & FMODE_READ) { 2569 if (trace_parser_get_init(&parser, FTRACE_BUFF_MAX)) {
2671 struct seq_file *m = file->private_data; 2570 ret = -ENOMEM;
2672 array = m->private; 2571 goto out_unlock;
2673 } else
2674 array = file->private_data;
2675
2676 ret = get_user(ch, ubuf++);
2677 if (ret)
2678 goto out;
2679 read++;
2680 cnt--;
2681
2682 /* skip white space */
2683 while (cnt && isspace(ch)) {
2684 ret = get_user(ch, ubuf++);
2685 if (ret)
2686 goto out;
2687 read++;
2688 cnt--;
2689 } 2572 }
2690 2573
2691 if (isspace(ch)) { 2574 read = trace_get_user(&parser, ubuf, cnt, ppos);
2692 *ppos += read;
2693 ret = read;
2694 goto out;
2695 }
2696 2575
2697 while (cnt && !isspace(ch)) { 2576 if (read >= 0 && trace_parser_loaded((&parser))) {
2698 if (index < FTRACE_BUFF_MAX) 2577 parser.buffer[parser.idx] = 0;
2699 buffer[index++] = ch; 2578
2700 else { 2579 /* we allow only one expression at a time */
2701 ret = -EINVAL; 2580 ret = ftrace_set_func(ftrace_graph_funcs, &ftrace_graph_count,
2702 goto out; 2581 parser.buffer);
2703 }
2704 ret = get_user(ch, ubuf++);
2705 if (ret) 2582 if (ret)
2706 goto out; 2583 goto out_free;
2707 read++;
2708 cnt--;
2709 } 2584 }
2710 buffer[index] = 0;
2711
2712 /* we allow only one expression at a time */
2713 ret = ftrace_set_func(array, &ftrace_graph_count, buffer);
2714 if (ret)
2715 goto out;
2716
2717 file->f_pos += read;
2718 2585
2719 ret = read; 2586 ret = read;
2720 out: 2587
2588out_free:
2589 trace_parser_put(&parser);
2590out_unlock:
2721 mutex_unlock(&graph_lock); 2591 mutex_unlock(&graph_lock);
2722 2592
2723 return ret; 2593 return ret;
2724} 2594}
2725 2595
2726static const struct file_operations ftrace_graph_fops = { 2596static const struct file_operations ftrace_graph_fops = {
2727 .open = ftrace_graph_open, 2597 .open = ftrace_graph_open,
2728 .read = seq_read, 2598 .read = seq_read,
2729 .write = ftrace_graph_write, 2599 .write = ftrace_graph_write,
2600 .release = ftrace_graph_release,
2730}; 2601};
2731#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ 2602#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
2732 2603
@@ -3147,7 +3018,7 @@ int unregister_ftrace_function(struct ftrace_ops *ops)
3147 3018
3148int 3019int
3149ftrace_enable_sysctl(struct ctl_table *table, int write, 3020ftrace_enable_sysctl(struct ctl_table *table, int write,
3150 struct file *file, void __user *buffer, size_t *lenp, 3021 void __user *buffer, size_t *lenp,
3151 loff_t *ppos) 3022 loff_t *ppos)
3152{ 3023{
3153 int ret; 3024 int ret;
@@ -3157,7 +3028,7 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
3157 3028
3158 mutex_lock(&ftrace_lock); 3029 mutex_lock(&ftrace_lock);
3159 3030
3160 ret = proc_dointvec(table, write, file, buffer, lenp, ppos); 3031 ret = proc_dointvec(table, write, buffer, lenp, ppos);
3161 3032
3162 if (ret || !write || (last_ftrace_enabled == !!ftrace_enabled)) 3033 if (ret || !write || (last_ftrace_enabled == !!ftrace_enabled))
3163 goto out; 3034 goto out;
diff --git a/kernel/trace/kmemtrace.c b/kernel/trace/kmemtrace.c
index 1edaa9516e81..81b1645c8549 100644
--- a/kernel/trace/kmemtrace.c
+++ b/kernel/trace/kmemtrace.c
@@ -183,11 +183,9 @@ static void kmemtrace_stop_probes(void)
183 183
184static int kmem_trace_init(struct trace_array *tr) 184static int kmem_trace_init(struct trace_array *tr)
185{ 185{
186 int cpu;
187 kmemtrace_array = tr; 186 kmemtrace_array = tr;
188 187
189 for_each_cpu(cpu, cpu_possible_mask) 188 tracing_reset_online_cpus(tr);
190 tracing_reset(tr, cpu);
191 189
192 kmemtrace_start_probes(); 190 kmemtrace_start_probes();
193 191
@@ -239,12 +237,52 @@ struct kmemtrace_user_event_alloc {
239}; 237};
240 238
241static enum print_line_t 239static enum print_line_t
242kmemtrace_print_alloc_user(struct trace_iterator *iter, 240kmemtrace_print_alloc(struct trace_iterator *iter, int flags)
243 struct kmemtrace_alloc_entry *entry)
244{ 241{
245 struct kmemtrace_user_event_alloc *ev_alloc;
246 struct trace_seq *s = &iter->seq; 242 struct trace_seq *s = &iter->seq;
243 struct kmemtrace_alloc_entry *entry;
244 int ret;
245
246 trace_assign_type(entry, iter->ent);
247
248 ret = trace_seq_printf(s, "type_id %d call_site %pF ptr %lu "
249 "bytes_req %lu bytes_alloc %lu gfp_flags %lu node %d\n",
250 entry->type_id, (void *)entry->call_site, (unsigned long)entry->ptr,
251 (unsigned long)entry->bytes_req, (unsigned long)entry->bytes_alloc,
252 (unsigned long)entry->gfp_flags, entry->node);
253
254 if (!ret)
255 return TRACE_TYPE_PARTIAL_LINE;
256 return TRACE_TYPE_HANDLED;
257}
258
259static enum print_line_t
260kmemtrace_print_free(struct trace_iterator *iter, int flags)
261{
262 struct trace_seq *s = &iter->seq;
263 struct kmemtrace_free_entry *entry;
264 int ret;
265
266 trace_assign_type(entry, iter->ent);
267
268 ret = trace_seq_printf(s, "type_id %d call_site %pF ptr %lu\n",
269 entry->type_id, (void *)entry->call_site,
270 (unsigned long)entry->ptr);
271
272 if (!ret)
273 return TRACE_TYPE_PARTIAL_LINE;
274 return TRACE_TYPE_HANDLED;
275}
276
277static enum print_line_t
278kmemtrace_print_alloc_user(struct trace_iterator *iter, int flags)
279{
280 struct trace_seq *s = &iter->seq;
281 struct kmemtrace_alloc_entry *entry;
247 struct kmemtrace_user_event *ev; 282 struct kmemtrace_user_event *ev;
283 struct kmemtrace_user_event_alloc *ev_alloc;
284
285 trace_assign_type(entry, iter->ent);
248 286
249 ev = trace_seq_reserve(s, sizeof(*ev)); 287 ev = trace_seq_reserve(s, sizeof(*ev));
250 if (!ev) 288 if (!ev)
@@ -271,12 +309,14 @@ kmemtrace_print_alloc_user(struct trace_iterator *iter,
271} 309}
272 310
273static enum print_line_t 311static enum print_line_t
274kmemtrace_print_free_user(struct trace_iterator *iter, 312kmemtrace_print_free_user(struct trace_iterator *iter, int flags)
275 struct kmemtrace_free_entry *entry)
276{ 313{
277 struct trace_seq *s = &iter->seq; 314 struct trace_seq *s = &iter->seq;
315 struct kmemtrace_free_entry *entry;
278 struct kmemtrace_user_event *ev; 316 struct kmemtrace_user_event *ev;
279 317
318 trace_assign_type(entry, iter->ent);
319
280 ev = trace_seq_reserve(s, sizeof(*ev)); 320 ev = trace_seq_reserve(s, sizeof(*ev));
281 if (!ev) 321 if (!ev)
282 return TRACE_TYPE_PARTIAL_LINE; 322 return TRACE_TYPE_PARTIAL_LINE;
@@ -294,12 +334,14 @@ kmemtrace_print_free_user(struct trace_iterator *iter,
294 334
295/* The two other following provide a more minimalistic output */ 335/* The two other following provide a more minimalistic output */
296static enum print_line_t 336static enum print_line_t
297kmemtrace_print_alloc_compress(struct trace_iterator *iter, 337kmemtrace_print_alloc_compress(struct trace_iterator *iter)
298 struct kmemtrace_alloc_entry *entry)
299{ 338{
339 struct kmemtrace_alloc_entry *entry;
300 struct trace_seq *s = &iter->seq; 340 struct trace_seq *s = &iter->seq;
301 int ret; 341 int ret;
302 342
343 trace_assign_type(entry, iter->ent);
344
303 /* Alloc entry */ 345 /* Alloc entry */
304 ret = trace_seq_printf(s, " + "); 346 ret = trace_seq_printf(s, " + ");
305 if (!ret) 347 if (!ret)
@@ -345,29 +387,24 @@ kmemtrace_print_alloc_compress(struct trace_iterator *iter,
345 if (!ret) 387 if (!ret)
346 return TRACE_TYPE_PARTIAL_LINE; 388 return TRACE_TYPE_PARTIAL_LINE;
347 389
348 /* Node */ 390 /* Node and call site*/
349 ret = trace_seq_printf(s, "%4d ", entry->node); 391 ret = trace_seq_printf(s, "%4d %pf\n", entry->node,
350 if (!ret) 392 (void *)entry->call_site);
351 return TRACE_TYPE_PARTIAL_LINE;
352
353 /* Call site */
354 ret = seq_print_ip_sym(s, entry->call_site, 0);
355 if (!ret) 393 if (!ret)
356 return TRACE_TYPE_PARTIAL_LINE; 394 return TRACE_TYPE_PARTIAL_LINE;
357 395
358 if (!trace_seq_printf(s, "\n"))
359 return TRACE_TYPE_PARTIAL_LINE;
360
361 return TRACE_TYPE_HANDLED; 396 return TRACE_TYPE_HANDLED;
362} 397}
363 398
364static enum print_line_t 399static enum print_line_t
365kmemtrace_print_free_compress(struct trace_iterator *iter, 400kmemtrace_print_free_compress(struct trace_iterator *iter)
366 struct kmemtrace_free_entry *entry)
367{ 401{
402 struct kmemtrace_free_entry *entry;
368 struct trace_seq *s = &iter->seq; 403 struct trace_seq *s = &iter->seq;
369 int ret; 404 int ret;
370 405
406 trace_assign_type(entry, iter->ent);
407
371 /* Free entry */ 408 /* Free entry */
372 ret = trace_seq_printf(s, " - "); 409 ret = trace_seq_printf(s, " - ");
373 if (!ret) 410 if (!ret)
@@ -401,19 +438,11 @@ kmemtrace_print_free_compress(struct trace_iterator *iter,
401 if (!ret) 438 if (!ret)
402 return TRACE_TYPE_PARTIAL_LINE; 439 return TRACE_TYPE_PARTIAL_LINE;
403 440
404 /* Skip node */ 441 /* Skip node and print call site*/
405 ret = trace_seq_printf(s, " "); 442 ret = trace_seq_printf(s, " %pf\n", (void *)entry->call_site);
406 if (!ret) 443 if (!ret)
407 return TRACE_TYPE_PARTIAL_LINE; 444 return TRACE_TYPE_PARTIAL_LINE;
408 445
409 /* Call site */
410 ret = seq_print_ip_sym(s, entry->call_site, 0);
411 if (!ret)
412 return TRACE_TYPE_PARTIAL_LINE;
413
414 if (!trace_seq_printf(s, "\n"))
415 return TRACE_TYPE_PARTIAL_LINE;
416
417 return TRACE_TYPE_HANDLED; 446 return TRACE_TYPE_HANDLED;
418} 447}
419 448
@@ -421,32 +450,31 @@ static enum print_line_t kmemtrace_print_line(struct trace_iterator *iter)
421{ 450{
422 struct trace_entry *entry = iter->ent; 451 struct trace_entry *entry = iter->ent;
423 452
424 switch (entry->type) { 453 if (!(kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL))
425 case TRACE_KMEM_ALLOC: { 454 return TRACE_TYPE_UNHANDLED;
426 struct kmemtrace_alloc_entry *field;
427
428 trace_assign_type(field, entry);
429 if (kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL)
430 return kmemtrace_print_alloc_compress(iter, field);
431 else
432 return kmemtrace_print_alloc_user(iter, field);
433 }
434
435 case TRACE_KMEM_FREE: {
436 struct kmemtrace_free_entry *field;
437
438 trace_assign_type(field, entry);
439 if (kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL)
440 return kmemtrace_print_free_compress(iter, field);
441 else
442 return kmemtrace_print_free_user(iter, field);
443 }
444 455
456 switch (entry->type) {
457 case TRACE_KMEM_ALLOC:
458 return kmemtrace_print_alloc_compress(iter);
459 case TRACE_KMEM_FREE:
460 return kmemtrace_print_free_compress(iter);
445 default: 461 default:
446 return TRACE_TYPE_UNHANDLED; 462 return TRACE_TYPE_UNHANDLED;
447 } 463 }
448} 464}
449 465
466static struct trace_event kmem_trace_alloc = {
467 .type = TRACE_KMEM_ALLOC,
468 .trace = kmemtrace_print_alloc,
469 .binary = kmemtrace_print_alloc_user,
470};
471
472static struct trace_event kmem_trace_free = {
473 .type = TRACE_KMEM_FREE,
474 .trace = kmemtrace_print_free,
475 .binary = kmemtrace_print_free_user,
476};
477
450static struct tracer kmem_tracer __read_mostly = { 478static struct tracer kmem_tracer __read_mostly = {
451 .name = "kmemtrace", 479 .name = "kmemtrace",
452 .init = kmem_trace_init, 480 .init = kmem_trace_init,
@@ -463,6 +491,21 @@ void kmemtrace_init(void)
463 491
464static int __init init_kmem_tracer(void) 492static int __init init_kmem_tracer(void)
465{ 493{
466 return register_tracer(&kmem_tracer); 494 if (!register_ftrace_event(&kmem_trace_alloc)) {
495 pr_warning("Warning: could not register kmem events\n");
496 return 1;
497 }
498
499 if (!register_ftrace_event(&kmem_trace_free)) {
500 pr_warning("Warning: could not register kmem events\n");
501 return 1;
502 }
503
504 if (!register_tracer(&kmem_tracer)) {
505 pr_warning("Warning: could not register the kmem tracer\n");
506 return 1;
507 }
508
509 return 0;
467} 510}
468device_initcall(init_kmem_tracer); 511device_initcall(init_kmem_tracer);
diff --git a/kernel/trace/power-traces.c b/kernel/trace/power-traces.c
new file mode 100644
index 000000000000..e06c6e3d56a3
--- /dev/null
+++ b/kernel/trace/power-traces.c
@@ -0,0 +1,20 @@
1/*
2 * Power trace points
3 *
4 * Copyright (C) 2009 Arjan van de Ven <arjan@linux.intel.com>
5 */
6
7#include <linux/string.h>
8#include <linux/types.h>
9#include <linux/workqueue.h>
10#include <linux/sched.h>
11#include <linux/module.h>
12#include <linux/slab.h>
13
14#define CREATE_TRACE_POINTS
15#include <trace/events/power.h>
16
17EXPORT_TRACEPOINT_SYMBOL_GPL(power_start);
18EXPORT_TRACEPOINT_SYMBOL_GPL(power_end);
19EXPORT_TRACEPOINT_SYMBOL_GPL(power_frequency);
20
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index bf27bb7a63e2..d4ff01970547 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -201,8 +201,6 @@ int tracing_is_on(void)
201} 201}
202EXPORT_SYMBOL_GPL(tracing_is_on); 202EXPORT_SYMBOL_GPL(tracing_is_on);
203 203
204#include "trace.h"
205
206#define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array)) 204#define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array))
207#define RB_ALIGNMENT 4U 205#define RB_ALIGNMENT 4U
208#define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX) 206#define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
@@ -218,17 +216,12 @@ enum {
218 216
219static inline int rb_null_event(struct ring_buffer_event *event) 217static inline int rb_null_event(struct ring_buffer_event *event)
220{ 218{
221 return event->type_len == RINGBUF_TYPE_PADDING 219 return event->type_len == RINGBUF_TYPE_PADDING && !event->time_delta;
222 && event->time_delta == 0;
223}
224
225static inline int rb_discarded_event(struct ring_buffer_event *event)
226{
227 return event->type_len == RINGBUF_TYPE_PADDING && event->time_delta;
228} 220}
229 221
230static void rb_event_set_padding(struct ring_buffer_event *event) 222static void rb_event_set_padding(struct ring_buffer_event *event)
231{ 223{
224 /* padding has a NULL time_delta */
232 event->type_len = RINGBUF_TYPE_PADDING; 225 event->type_len = RINGBUF_TYPE_PADDING;
233 event->time_delta = 0; 226 event->time_delta = 0;
234} 227}
@@ -322,6 +315,14 @@ struct buffer_data_page {
322 unsigned char data[]; /* data of buffer page */ 315 unsigned char data[]; /* data of buffer page */
323}; 316};
324 317
318/*
319 * Note, the buffer_page list must be first. The buffer pages
320 * are allocated in cache lines, which means that each buffer
321 * page will be at the beginning of a cache line, and thus
322 * the least significant bits will be zero. We use this to
323 * add flags in the list struct pointers, to make the ring buffer
324 * lockless.
325 */
325struct buffer_page { 326struct buffer_page {
326 struct list_head list; /* list of buffer pages */ 327 struct list_head list; /* list of buffer pages */
327 local_t write; /* index for next write */ 328 local_t write; /* index for next write */
@@ -330,6 +331,21 @@ struct buffer_page {
330 struct buffer_data_page *page; /* Actual data page */ 331 struct buffer_data_page *page; /* Actual data page */
331}; 332};
332 333
334/*
335 * The buffer page counters, write and entries, must be reset
336 * atomically when crossing page boundaries. To synchronize this
337 * update, two counters are inserted into the number. One is
338 * the actual counter for the write position or count on the page.
339 *
340 * The other is a counter of updaters. Before an update happens
341 * the update partition of the counter is incremented. This will
342 * allow the updater to update the counter atomically.
343 *
344 * The counter is 20 bits, and the state data is 12.
345 */
346#define RB_WRITE_MASK 0xfffff
347#define RB_WRITE_INTCNT (1 << 20)
348
333static void rb_init_page(struct buffer_data_page *bpage) 349static void rb_init_page(struct buffer_data_page *bpage)
334{ 350{
335 local_set(&bpage->commit, 0); 351 local_set(&bpage->commit, 0);
@@ -403,21 +419,20 @@ int ring_buffer_print_page_header(struct trace_seq *s)
403struct ring_buffer_per_cpu { 419struct ring_buffer_per_cpu {
404 int cpu; 420 int cpu;
405 struct ring_buffer *buffer; 421 struct ring_buffer *buffer;
406 spinlock_t reader_lock; /* serialize readers */ 422 spinlock_t reader_lock; /* serialize readers */
407 raw_spinlock_t lock; 423 raw_spinlock_t lock;
408 struct lock_class_key lock_key; 424 struct lock_class_key lock_key;
409 struct list_head pages; 425 struct list_head *pages;
410 struct buffer_page *head_page; /* read from head */ 426 struct buffer_page *head_page; /* read from head */
411 struct buffer_page *tail_page; /* write to tail */ 427 struct buffer_page *tail_page; /* write to tail */
412 struct buffer_page *commit_page; /* committed pages */ 428 struct buffer_page *commit_page; /* committed pages */
413 struct buffer_page *reader_page; 429 struct buffer_page *reader_page;
414 unsigned long nmi_dropped; 430 local_t commit_overrun;
415 unsigned long commit_overrun; 431 local_t overrun;
416 unsigned long overrun;
417 unsigned long read;
418 local_t entries; 432 local_t entries;
419 local_t committing; 433 local_t committing;
420 local_t commits; 434 local_t commits;
435 unsigned long read;
421 u64 write_stamp; 436 u64 write_stamp;
422 u64 read_stamp; 437 u64 read_stamp;
423 atomic_t record_disabled; 438 atomic_t record_disabled;
@@ -450,14 +465,19 @@ struct ring_buffer_iter {
450}; 465};
451 466
452/* buffer may be either ring_buffer or ring_buffer_per_cpu */ 467/* buffer may be either ring_buffer or ring_buffer_per_cpu */
453#define RB_WARN_ON(buffer, cond) \ 468#define RB_WARN_ON(b, cond) \
454 ({ \ 469 ({ \
455 int _____ret = unlikely(cond); \ 470 int _____ret = unlikely(cond); \
456 if (_____ret) { \ 471 if (_____ret) { \
457 atomic_inc(&buffer->record_disabled); \ 472 if (__same_type(*(b), struct ring_buffer_per_cpu)) { \
458 WARN_ON(1); \ 473 struct ring_buffer_per_cpu *__b = \
459 } \ 474 (void *)b; \
460 _____ret; \ 475 atomic_inc(&__b->buffer->record_disabled); \
476 } else \
477 atomic_inc(&b->record_disabled); \
478 WARN_ON(1); \
479 } \
480 _____ret; \
461 }) 481 })
462 482
463/* Up this if you want to test the TIME_EXTENTS and normalization */ 483/* Up this if you want to test the TIME_EXTENTS and normalization */
@@ -489,6 +509,390 @@ void ring_buffer_normalize_time_stamp(struct ring_buffer *buffer,
489} 509}
490EXPORT_SYMBOL_GPL(ring_buffer_normalize_time_stamp); 510EXPORT_SYMBOL_GPL(ring_buffer_normalize_time_stamp);
491 511
512/*
513 * Making the ring buffer lockless makes things tricky.
514 * Although writes only happen on the CPU that they are on,
515 * and they only need to worry about interrupts. Reads can
516 * happen on any CPU.
517 *
518 * The reader page is always off the ring buffer, but when the
519 * reader finishes with a page, it needs to swap its page with
520 * a new one from the buffer. The reader needs to take from
521 * the head (writes go to the tail). But if a writer is in overwrite
522 * mode and wraps, it must push the head page forward.
523 *
524 * Here lies the problem.
525 *
526 * The reader must be careful to replace only the head page, and
527 * not another one. As described at the top of the file in the
528 * ASCII art, the reader sets its old page to point to the next
529 * page after head. It then sets the page after head to point to
530 * the old reader page. But if the writer moves the head page
531 * during this operation, the reader could end up with the tail.
532 *
533 * We use cmpxchg to help prevent this race. We also do something
534 * special with the page before head. We set the LSB to 1.
535 *
536 * When the writer must push the page forward, it will clear the
537 * bit that points to the head page, move the head, and then set
538 * the bit that points to the new head page.
539 *
540 * We also don't want an interrupt coming in and moving the head
541 * page on another writer. Thus we use the second LSB to catch
542 * that too. Thus:
543 *
544 * head->list->prev->next bit 1 bit 0
545 * ------- -------
546 * Normal page 0 0
547 * Points to head page 0 1
548 * New head page 1 0
549 *
550 * Note we can not trust the prev pointer of the head page, because:
551 *
552 * +----+ +-----+ +-----+
553 * | |------>| T |---X--->| N |
554 * | |<------| | | |
555 * +----+ +-----+ +-----+
556 * ^ ^ |
557 * | +-----+ | |
558 * +----------| R |----------+ |
559 * | |<-----------+
560 * +-----+
561 *
562 * Key: ---X--> HEAD flag set in pointer
563 * T Tail page
564 * R Reader page
565 * N Next page
566 *
567 * (see __rb_reserve_next() to see where this happens)
568 *
569 * What the above shows is that the reader just swapped out
570 * the reader page with a page in the buffer, but before it
571 * could make the new header point back to the new page added
572 * it was preempted by a writer. The writer moved forward onto
573 * the new page added by the reader and is about to move forward
574 * again.
575 *
576 * You can see, it is legitimate for the previous pointer of
577 * the head (or any page) not to point back to itself. But only
578 * temporarially.
579 */
580
581#define RB_PAGE_NORMAL 0UL
582#define RB_PAGE_HEAD 1UL
583#define RB_PAGE_UPDATE 2UL
584
585
586#define RB_FLAG_MASK 3UL
587
588/* PAGE_MOVED is not part of the mask */
589#define RB_PAGE_MOVED 4UL
590
591/*
592 * rb_list_head - remove any bit
593 */
594static struct list_head *rb_list_head(struct list_head *list)
595{
596 unsigned long val = (unsigned long)list;
597
598 return (struct list_head *)(val & ~RB_FLAG_MASK);
599}
600
601/*
602 * rb_is_head_page - test if the give page is the head page
603 *
604 * Because the reader may move the head_page pointer, we can
605 * not trust what the head page is (it may be pointing to
606 * the reader page). But if the next page is a header page,
607 * its flags will be non zero.
608 */
609static int inline
610rb_is_head_page(struct ring_buffer_per_cpu *cpu_buffer,
611 struct buffer_page *page, struct list_head *list)
612{
613 unsigned long val;
614
615 val = (unsigned long)list->next;
616
617 if ((val & ~RB_FLAG_MASK) != (unsigned long)&page->list)
618 return RB_PAGE_MOVED;
619
620 return val & RB_FLAG_MASK;
621}
622
623/*
624 * rb_is_reader_page
625 *
626 * The unique thing about the reader page, is that, if the
627 * writer is ever on it, the previous pointer never points
628 * back to the reader page.
629 */
630static int rb_is_reader_page(struct buffer_page *page)
631{
632 struct list_head *list = page->list.prev;
633
634 return rb_list_head(list->next) != &page->list;
635}
636
637/*
638 * rb_set_list_to_head - set a list_head to be pointing to head.
639 */
640static void rb_set_list_to_head(struct ring_buffer_per_cpu *cpu_buffer,
641 struct list_head *list)
642{
643 unsigned long *ptr;
644
645 ptr = (unsigned long *)&list->next;
646 *ptr |= RB_PAGE_HEAD;
647 *ptr &= ~RB_PAGE_UPDATE;
648}
649
650/*
651 * rb_head_page_activate - sets up head page
652 */
653static void rb_head_page_activate(struct ring_buffer_per_cpu *cpu_buffer)
654{
655 struct buffer_page *head;
656
657 head = cpu_buffer->head_page;
658 if (!head)
659 return;
660
661 /*
662 * Set the previous list pointer to have the HEAD flag.
663 */
664 rb_set_list_to_head(cpu_buffer, head->list.prev);
665}
666
667static void rb_list_head_clear(struct list_head *list)
668{
669 unsigned long *ptr = (unsigned long *)&list->next;
670
671 *ptr &= ~RB_FLAG_MASK;
672}
673
674/*
675 * rb_head_page_dactivate - clears head page ptr (for free list)
676 */
677static void
678rb_head_page_deactivate(struct ring_buffer_per_cpu *cpu_buffer)
679{
680 struct list_head *hd;
681
682 /* Go through the whole list and clear any pointers found. */
683 rb_list_head_clear(cpu_buffer->pages);
684
685 list_for_each(hd, cpu_buffer->pages)
686 rb_list_head_clear(hd);
687}
688
689static int rb_head_page_set(struct ring_buffer_per_cpu *cpu_buffer,
690 struct buffer_page *head,
691 struct buffer_page *prev,
692 int old_flag, int new_flag)
693{
694 struct list_head *list;
695 unsigned long val = (unsigned long)&head->list;
696 unsigned long ret;
697
698 list = &prev->list;
699
700 val &= ~RB_FLAG_MASK;
701
702 ret = cmpxchg((unsigned long *)&list->next,
703 val | old_flag, val | new_flag);
704
705 /* check if the reader took the page */
706 if ((ret & ~RB_FLAG_MASK) != val)
707 return RB_PAGE_MOVED;
708
709 return ret & RB_FLAG_MASK;
710}
711
712static int rb_head_page_set_update(struct ring_buffer_per_cpu *cpu_buffer,
713 struct buffer_page *head,
714 struct buffer_page *prev,
715 int old_flag)
716{
717 return rb_head_page_set(cpu_buffer, head, prev,
718 old_flag, RB_PAGE_UPDATE);
719}
720
721static int rb_head_page_set_head(struct ring_buffer_per_cpu *cpu_buffer,
722 struct buffer_page *head,
723 struct buffer_page *prev,
724 int old_flag)
725{
726 return rb_head_page_set(cpu_buffer, head, prev,
727 old_flag, RB_PAGE_HEAD);
728}
729
730static int rb_head_page_set_normal(struct ring_buffer_per_cpu *cpu_buffer,
731 struct buffer_page *head,
732 struct buffer_page *prev,
733 int old_flag)
734{
735 return rb_head_page_set(cpu_buffer, head, prev,
736 old_flag, RB_PAGE_NORMAL);
737}
738
739static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer,
740 struct buffer_page **bpage)
741{
742 struct list_head *p = rb_list_head((*bpage)->list.next);
743
744 *bpage = list_entry(p, struct buffer_page, list);
745}
746
747static struct buffer_page *
748rb_set_head_page(struct ring_buffer_per_cpu *cpu_buffer)
749{
750 struct buffer_page *head;
751 struct buffer_page *page;
752 struct list_head *list;
753 int i;
754
755 if (RB_WARN_ON(cpu_buffer, !cpu_buffer->head_page))
756 return NULL;
757
758 /* sanity check */
759 list = cpu_buffer->pages;
760 if (RB_WARN_ON(cpu_buffer, rb_list_head(list->prev->next) != list))
761 return NULL;
762
763 page = head = cpu_buffer->head_page;
764 /*
765 * It is possible that the writer moves the header behind
766 * where we started, and we miss in one loop.
767 * A second loop should grab the header, but we'll do
768 * three loops just because I'm paranoid.
769 */
770 for (i = 0; i < 3; i++) {
771 do {
772 if (rb_is_head_page(cpu_buffer, page, page->list.prev)) {
773 cpu_buffer->head_page = page;
774 return page;
775 }
776 rb_inc_page(cpu_buffer, &page);
777 } while (page != head);
778 }
779
780 RB_WARN_ON(cpu_buffer, 1);
781
782 return NULL;
783}
784
785static int rb_head_page_replace(struct buffer_page *old,
786 struct buffer_page *new)
787{
788 unsigned long *ptr = (unsigned long *)&old->list.prev->next;
789 unsigned long val;
790 unsigned long ret;
791
792 val = *ptr & ~RB_FLAG_MASK;
793 val |= RB_PAGE_HEAD;
794
795 ret = cmpxchg(ptr, val, (unsigned long)&new->list);
796
797 return ret == val;
798}
799
800/*
801 * rb_tail_page_update - move the tail page forward
802 *
803 * Returns 1 if moved tail page, 0 if someone else did.
804 */
805static int rb_tail_page_update(struct ring_buffer_per_cpu *cpu_buffer,
806 struct buffer_page *tail_page,
807 struct buffer_page *next_page)
808{
809 struct buffer_page *old_tail;
810 unsigned long old_entries;
811 unsigned long old_write;
812 int ret = 0;
813
814 /*
815 * The tail page now needs to be moved forward.
816 *
817 * We need to reset the tail page, but without messing
818 * with possible erasing of data brought in by interrupts
819 * that have moved the tail page and are currently on it.
820 *
821 * We add a counter to the write field to denote this.
822 */
823 old_write = local_add_return(RB_WRITE_INTCNT, &next_page->write);
824 old_entries = local_add_return(RB_WRITE_INTCNT, &next_page->entries);
825
826 /*
827 * Just make sure we have seen our old_write and synchronize
828 * with any interrupts that come in.
829 */
830 barrier();
831
832 /*
833 * If the tail page is still the same as what we think
834 * it is, then it is up to us to update the tail
835 * pointer.
836 */
837 if (tail_page == cpu_buffer->tail_page) {
838 /* Zero the write counter */
839 unsigned long val = old_write & ~RB_WRITE_MASK;
840 unsigned long eval = old_entries & ~RB_WRITE_MASK;
841
842 /*
843 * This will only succeed if an interrupt did
844 * not come in and change it. In which case, we
845 * do not want to modify it.
846 *
847 * We add (void) to let the compiler know that we do not care
848 * about the return value of these functions. We use the
849 * cmpxchg to only update if an interrupt did not already
850 * do it for us. If the cmpxchg fails, we don't care.
851 */
852 (void)local_cmpxchg(&next_page->write, old_write, val);
853 (void)local_cmpxchg(&next_page->entries, old_entries, eval);
854
855 /*
856 * No need to worry about races with clearing out the commit.
857 * it only can increment when a commit takes place. But that
858 * only happens in the outer most nested commit.
859 */
860 local_set(&next_page->page->commit, 0);
861
862 old_tail = cmpxchg(&cpu_buffer->tail_page,
863 tail_page, next_page);
864
865 if (old_tail == tail_page)
866 ret = 1;
867 }
868
869 return ret;
870}
871
872static int rb_check_bpage(struct ring_buffer_per_cpu *cpu_buffer,
873 struct buffer_page *bpage)
874{
875 unsigned long val = (unsigned long)bpage;
876
877 if (RB_WARN_ON(cpu_buffer, val & RB_FLAG_MASK))
878 return 1;
879
880 return 0;
881}
882
883/**
884 * rb_check_list - make sure a pointer to a list has the last bits zero
885 */
886static int rb_check_list(struct ring_buffer_per_cpu *cpu_buffer,
887 struct list_head *list)
888{
889 if (RB_WARN_ON(cpu_buffer, rb_list_head(list->prev) != list->prev))
890 return 1;
891 if (RB_WARN_ON(cpu_buffer, rb_list_head(list->next) != list->next))
892 return 1;
893 return 0;
894}
895
492/** 896/**
493 * check_pages - integrity check of buffer pages 897 * check_pages - integrity check of buffer pages
494 * @cpu_buffer: CPU buffer with pages to test 898 * @cpu_buffer: CPU buffer with pages to test
@@ -498,14 +902,19 @@ EXPORT_SYMBOL_GPL(ring_buffer_normalize_time_stamp);
498 */ 902 */
499static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer) 903static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
500{ 904{
501 struct list_head *head = &cpu_buffer->pages; 905 struct list_head *head = cpu_buffer->pages;
502 struct buffer_page *bpage, *tmp; 906 struct buffer_page *bpage, *tmp;
503 907
908 rb_head_page_deactivate(cpu_buffer);
909
504 if (RB_WARN_ON(cpu_buffer, head->next->prev != head)) 910 if (RB_WARN_ON(cpu_buffer, head->next->prev != head))
505 return -1; 911 return -1;
506 if (RB_WARN_ON(cpu_buffer, head->prev->next != head)) 912 if (RB_WARN_ON(cpu_buffer, head->prev->next != head))
507 return -1; 913 return -1;
508 914
915 if (rb_check_list(cpu_buffer, head))
916 return -1;
917
509 list_for_each_entry_safe(bpage, tmp, head, list) { 918 list_for_each_entry_safe(bpage, tmp, head, list) {
510 if (RB_WARN_ON(cpu_buffer, 919 if (RB_WARN_ON(cpu_buffer,
511 bpage->list.next->prev != &bpage->list)) 920 bpage->list.next->prev != &bpage->list))
@@ -513,25 +922,33 @@ static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
513 if (RB_WARN_ON(cpu_buffer, 922 if (RB_WARN_ON(cpu_buffer,
514 bpage->list.prev->next != &bpage->list)) 923 bpage->list.prev->next != &bpage->list))
515 return -1; 924 return -1;
925 if (rb_check_list(cpu_buffer, &bpage->list))
926 return -1;
516 } 927 }
517 928
929 rb_head_page_activate(cpu_buffer);
930
518 return 0; 931 return 0;
519} 932}
520 933
521static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer, 934static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
522 unsigned nr_pages) 935 unsigned nr_pages)
523{ 936{
524 struct list_head *head = &cpu_buffer->pages;
525 struct buffer_page *bpage, *tmp; 937 struct buffer_page *bpage, *tmp;
526 unsigned long addr; 938 unsigned long addr;
527 LIST_HEAD(pages); 939 LIST_HEAD(pages);
528 unsigned i; 940 unsigned i;
529 941
942 WARN_ON(!nr_pages);
943
530 for (i = 0; i < nr_pages; i++) { 944 for (i = 0; i < nr_pages; i++) {
531 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), 945 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
532 GFP_KERNEL, cpu_to_node(cpu_buffer->cpu)); 946 GFP_KERNEL, cpu_to_node(cpu_buffer->cpu));
533 if (!bpage) 947 if (!bpage)
534 goto free_pages; 948 goto free_pages;
949
950 rb_check_bpage(cpu_buffer, bpage);
951
535 list_add(&bpage->list, &pages); 952 list_add(&bpage->list, &pages);
536 953
537 addr = __get_free_page(GFP_KERNEL); 954 addr = __get_free_page(GFP_KERNEL);
@@ -541,7 +958,13 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
541 rb_init_page(bpage->page); 958 rb_init_page(bpage->page);
542 } 959 }
543 960
544 list_splice(&pages, head); 961 /*
962 * The ring buffer page list is a circular list that does not
963 * start and end with a list head. All page list items point to
964 * other pages.
965 */
966 cpu_buffer->pages = pages.next;
967 list_del(&pages);
545 968
546 rb_check_pages(cpu_buffer); 969 rb_check_pages(cpu_buffer);
547 970
@@ -573,13 +996,14 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
573 spin_lock_init(&cpu_buffer->reader_lock); 996 spin_lock_init(&cpu_buffer->reader_lock);
574 lockdep_set_class(&cpu_buffer->reader_lock, buffer->reader_lock_key); 997 lockdep_set_class(&cpu_buffer->reader_lock, buffer->reader_lock_key);
575 cpu_buffer->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; 998 cpu_buffer->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
576 INIT_LIST_HEAD(&cpu_buffer->pages);
577 999
578 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), 1000 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
579 GFP_KERNEL, cpu_to_node(cpu)); 1001 GFP_KERNEL, cpu_to_node(cpu));
580 if (!bpage) 1002 if (!bpage)
581 goto fail_free_buffer; 1003 goto fail_free_buffer;
582 1004
1005 rb_check_bpage(cpu_buffer, bpage);
1006
583 cpu_buffer->reader_page = bpage; 1007 cpu_buffer->reader_page = bpage;
584 addr = __get_free_page(GFP_KERNEL); 1008 addr = __get_free_page(GFP_KERNEL);
585 if (!addr) 1009 if (!addr)
@@ -594,9 +1018,11 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
594 goto fail_free_reader; 1018 goto fail_free_reader;
595 1019
596 cpu_buffer->head_page 1020 cpu_buffer->head_page
597 = list_entry(cpu_buffer->pages.next, struct buffer_page, list); 1021 = list_entry(cpu_buffer->pages, struct buffer_page, list);
598 cpu_buffer->tail_page = cpu_buffer->commit_page = cpu_buffer->head_page; 1022 cpu_buffer->tail_page = cpu_buffer->commit_page = cpu_buffer->head_page;
599 1023
1024 rb_head_page_activate(cpu_buffer);
1025
600 return cpu_buffer; 1026 return cpu_buffer;
601 1027
602 fail_free_reader: 1028 fail_free_reader:
@@ -609,15 +1035,22 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
609 1035
610static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer) 1036static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
611{ 1037{
612 struct list_head *head = &cpu_buffer->pages; 1038 struct list_head *head = cpu_buffer->pages;
613 struct buffer_page *bpage, *tmp; 1039 struct buffer_page *bpage, *tmp;
614 1040
615 free_buffer_page(cpu_buffer->reader_page); 1041 free_buffer_page(cpu_buffer->reader_page);
616 1042
617 list_for_each_entry_safe(bpage, tmp, head, list) { 1043 rb_head_page_deactivate(cpu_buffer);
618 list_del_init(&bpage->list); 1044
1045 if (head) {
1046 list_for_each_entry_safe(bpage, tmp, head, list) {
1047 list_del_init(&bpage->list);
1048 free_buffer_page(bpage);
1049 }
1050 bpage = list_entry(head, struct buffer_page, list);
619 free_buffer_page(bpage); 1051 free_buffer_page(bpage);
620 } 1052 }
1053
621 kfree(cpu_buffer); 1054 kfree(cpu_buffer);
622} 1055}
623 1056
@@ -735,6 +1168,7 @@ ring_buffer_free(struct ring_buffer *buffer)
735 1168
736 put_online_cpus(); 1169 put_online_cpus();
737 1170
1171 kfree(buffer->buffers);
738 free_cpumask_var(buffer->cpumask); 1172 free_cpumask_var(buffer->cpumask);
739 1173
740 kfree(buffer); 1174 kfree(buffer);
@@ -759,15 +1193,17 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
759 atomic_inc(&cpu_buffer->record_disabled); 1193 atomic_inc(&cpu_buffer->record_disabled);
760 synchronize_sched(); 1194 synchronize_sched();
761 1195
1196 rb_head_page_deactivate(cpu_buffer);
1197
762 for (i = 0; i < nr_pages; i++) { 1198 for (i = 0; i < nr_pages; i++) {
763 if (RB_WARN_ON(cpu_buffer, list_empty(&cpu_buffer->pages))) 1199 if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages)))
764 return; 1200 return;
765 p = cpu_buffer->pages.next; 1201 p = cpu_buffer->pages->next;
766 bpage = list_entry(p, struct buffer_page, list); 1202 bpage = list_entry(p, struct buffer_page, list);
767 list_del_init(&bpage->list); 1203 list_del_init(&bpage->list);
768 free_buffer_page(bpage); 1204 free_buffer_page(bpage);
769 } 1205 }
770 if (RB_WARN_ON(cpu_buffer, list_empty(&cpu_buffer->pages))) 1206 if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages)))
771 return; 1207 return;
772 1208
773 rb_reset_cpu(cpu_buffer); 1209 rb_reset_cpu(cpu_buffer);
@@ -789,15 +1225,19 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
789 atomic_inc(&cpu_buffer->record_disabled); 1225 atomic_inc(&cpu_buffer->record_disabled);
790 synchronize_sched(); 1226 synchronize_sched();
791 1227
1228 spin_lock_irq(&cpu_buffer->reader_lock);
1229 rb_head_page_deactivate(cpu_buffer);
1230
792 for (i = 0; i < nr_pages; i++) { 1231 for (i = 0; i < nr_pages; i++) {
793 if (RB_WARN_ON(cpu_buffer, list_empty(pages))) 1232 if (RB_WARN_ON(cpu_buffer, list_empty(pages)))
794 return; 1233 return;
795 p = pages->next; 1234 p = pages->next;
796 bpage = list_entry(p, struct buffer_page, list); 1235 bpage = list_entry(p, struct buffer_page, list);
797 list_del_init(&bpage->list); 1236 list_del_init(&bpage->list);
798 list_add_tail(&bpage->list, &cpu_buffer->pages); 1237 list_add_tail(&bpage->list, cpu_buffer->pages);
799 } 1238 }
800 rb_reset_cpu(cpu_buffer); 1239 rb_reset_cpu(cpu_buffer);
1240 spin_unlock_irq(&cpu_buffer->reader_lock);
801 1241
802 rb_check_pages(cpu_buffer); 1242 rb_check_pages(cpu_buffer);
803 1243
@@ -948,21 +1388,14 @@ rb_reader_event(struct ring_buffer_per_cpu *cpu_buffer)
948} 1388}
949 1389
950static inline struct ring_buffer_event * 1390static inline struct ring_buffer_event *
951rb_head_event(struct ring_buffer_per_cpu *cpu_buffer)
952{
953 return __rb_page_index(cpu_buffer->head_page,
954 cpu_buffer->head_page->read);
955}
956
957static inline struct ring_buffer_event *
958rb_iter_head_event(struct ring_buffer_iter *iter) 1391rb_iter_head_event(struct ring_buffer_iter *iter)
959{ 1392{
960 return __rb_page_index(iter->head_page, iter->head); 1393 return __rb_page_index(iter->head_page, iter->head);
961} 1394}
962 1395
963static inline unsigned rb_page_write(struct buffer_page *bpage) 1396static inline unsigned long rb_page_write(struct buffer_page *bpage)
964{ 1397{
965 return local_read(&bpage->write); 1398 return local_read(&bpage->write) & RB_WRITE_MASK;
966} 1399}
967 1400
968static inline unsigned rb_page_commit(struct buffer_page *bpage) 1401static inline unsigned rb_page_commit(struct buffer_page *bpage)
@@ -970,6 +1403,11 @@ static inline unsigned rb_page_commit(struct buffer_page *bpage)
970 return local_read(&bpage->page->commit); 1403 return local_read(&bpage->page->commit);
971} 1404}
972 1405
1406static inline unsigned long rb_page_entries(struct buffer_page *bpage)
1407{
1408 return local_read(&bpage->entries) & RB_WRITE_MASK;
1409}
1410
973/* Size is determined by what has been commited */ 1411/* Size is determined by what has been commited */
974static inline unsigned rb_page_size(struct buffer_page *bpage) 1412static inline unsigned rb_page_size(struct buffer_page *bpage)
975{ 1413{
@@ -982,22 +1420,6 @@ rb_commit_index(struct ring_buffer_per_cpu *cpu_buffer)
982 return rb_page_commit(cpu_buffer->commit_page); 1420 return rb_page_commit(cpu_buffer->commit_page);
983} 1421}
984 1422
985static inline unsigned rb_head_size(struct ring_buffer_per_cpu *cpu_buffer)
986{
987 return rb_page_commit(cpu_buffer->head_page);
988}
989
990static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer,
991 struct buffer_page **bpage)
992{
993 struct list_head *p = (*bpage)->list.next;
994
995 if (p == &cpu_buffer->pages)
996 p = p->next;
997
998 *bpage = list_entry(p, struct buffer_page, list);
999}
1000
1001static inline unsigned 1423static inline unsigned
1002rb_event_index(struct ring_buffer_event *event) 1424rb_event_index(struct ring_buffer_event *event)
1003{ 1425{
@@ -1023,6 +1445,8 @@ rb_event_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
1023static void 1445static void
1024rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer) 1446rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
1025{ 1447{
1448 unsigned long max_count;
1449
1026 /* 1450 /*
1027 * We only race with interrupts and NMIs on this CPU. 1451 * We only race with interrupts and NMIs on this CPU.
1028 * If we own the commit event, then we can commit 1452 * If we own the commit event, then we can commit
@@ -1032,9 +1456,16 @@ rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
1032 * assign the commit to the tail. 1456 * assign the commit to the tail.
1033 */ 1457 */
1034 again: 1458 again:
1459 max_count = cpu_buffer->buffer->pages * 100;
1460
1035 while (cpu_buffer->commit_page != cpu_buffer->tail_page) { 1461 while (cpu_buffer->commit_page != cpu_buffer->tail_page) {
1036 cpu_buffer->commit_page->page->commit = 1462 if (RB_WARN_ON(cpu_buffer, !(--max_count)))
1037 cpu_buffer->commit_page->write; 1463 return;
1464 if (RB_WARN_ON(cpu_buffer,
1465 rb_is_reader_page(cpu_buffer->tail_page)))
1466 return;
1467 local_set(&cpu_buffer->commit_page->page->commit,
1468 rb_page_write(cpu_buffer->commit_page));
1038 rb_inc_page(cpu_buffer, &cpu_buffer->commit_page); 1469 rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
1039 cpu_buffer->write_stamp = 1470 cpu_buffer->write_stamp =
1040 cpu_buffer->commit_page->page->time_stamp; 1471 cpu_buffer->commit_page->page->time_stamp;
@@ -1043,8 +1474,12 @@ rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
1043 } 1474 }
1044 while (rb_commit_index(cpu_buffer) != 1475 while (rb_commit_index(cpu_buffer) !=
1045 rb_page_write(cpu_buffer->commit_page)) { 1476 rb_page_write(cpu_buffer->commit_page)) {
1046 cpu_buffer->commit_page->page->commit = 1477
1047 cpu_buffer->commit_page->write; 1478 local_set(&cpu_buffer->commit_page->page->commit,
1479 rb_page_write(cpu_buffer->commit_page));
1480 RB_WARN_ON(cpu_buffer,
1481 local_read(&cpu_buffer->commit_page->page->commit) &
1482 ~RB_WRITE_MASK);
1048 barrier(); 1483 barrier();
1049 } 1484 }
1050 1485
@@ -1077,7 +1512,7 @@ static void rb_inc_iter(struct ring_buffer_iter *iter)
1077 * to the head page instead of next. 1512 * to the head page instead of next.
1078 */ 1513 */
1079 if (iter->head_page == cpu_buffer->reader_page) 1514 if (iter->head_page == cpu_buffer->reader_page)
1080 iter->head_page = cpu_buffer->head_page; 1515 iter->head_page = rb_set_head_page(cpu_buffer);
1081 else 1516 else
1082 rb_inc_page(cpu_buffer, &iter->head_page); 1517 rb_inc_page(cpu_buffer, &iter->head_page);
1083 1518
@@ -1121,6 +1556,163 @@ rb_update_event(struct ring_buffer_event *event,
1121 } 1556 }
1122} 1557}
1123 1558
1559/*
1560 * rb_handle_head_page - writer hit the head page
1561 *
1562 * Returns: +1 to retry page
1563 * 0 to continue
1564 * -1 on error
1565 */
1566static int
1567rb_handle_head_page(struct ring_buffer_per_cpu *cpu_buffer,
1568 struct buffer_page *tail_page,
1569 struct buffer_page *next_page)
1570{
1571 struct buffer_page *new_head;
1572 int entries;
1573 int type;
1574 int ret;
1575
1576 entries = rb_page_entries(next_page);
1577
1578 /*
1579 * The hard part is here. We need to move the head
1580 * forward, and protect against both readers on
1581 * other CPUs and writers coming in via interrupts.
1582 */
1583 type = rb_head_page_set_update(cpu_buffer, next_page, tail_page,
1584 RB_PAGE_HEAD);
1585
1586 /*
1587 * type can be one of four:
1588 * NORMAL - an interrupt already moved it for us
1589 * HEAD - we are the first to get here.
1590 * UPDATE - we are the interrupt interrupting
1591 * a current move.
1592 * MOVED - a reader on another CPU moved the next
1593 * pointer to its reader page. Give up
1594 * and try again.
1595 */
1596
1597 switch (type) {
1598 case RB_PAGE_HEAD:
1599 /*
1600 * We changed the head to UPDATE, thus
1601 * it is our responsibility to update
1602 * the counters.
1603 */
1604 local_add(entries, &cpu_buffer->overrun);
1605
1606 /*
1607 * The entries will be zeroed out when we move the
1608 * tail page.
1609 */
1610
1611 /* still more to do */
1612 break;
1613
1614 case RB_PAGE_UPDATE:
1615 /*
1616 * This is an interrupt that interrupt the
1617 * previous update. Still more to do.
1618 */
1619 break;
1620 case RB_PAGE_NORMAL:
1621 /*
1622 * An interrupt came in before the update
1623 * and processed this for us.
1624 * Nothing left to do.
1625 */
1626 return 1;
1627 case RB_PAGE_MOVED:
1628 /*
1629 * The reader is on another CPU and just did
1630 * a swap with our next_page.
1631 * Try again.
1632 */
1633 return 1;
1634 default:
1635 RB_WARN_ON(cpu_buffer, 1); /* WTF??? */
1636 return -1;
1637 }
1638
1639 /*
1640 * Now that we are here, the old head pointer is
1641 * set to UPDATE. This will keep the reader from
1642 * swapping the head page with the reader page.
1643 * The reader (on another CPU) will spin till
1644 * we are finished.
1645 *
1646 * We just need to protect against interrupts
1647 * doing the job. We will set the next pointer
1648 * to HEAD. After that, we set the old pointer
1649 * to NORMAL, but only if it was HEAD before.
1650 * otherwise we are an interrupt, and only
1651 * want the outer most commit to reset it.
1652 */
1653 new_head = next_page;
1654 rb_inc_page(cpu_buffer, &new_head);
1655
1656 ret = rb_head_page_set_head(cpu_buffer, new_head, next_page,
1657 RB_PAGE_NORMAL);
1658
1659 /*
1660 * Valid returns are:
1661 * HEAD - an interrupt came in and already set it.
1662 * NORMAL - One of two things:
1663 * 1) We really set it.
1664 * 2) A bunch of interrupts came in and moved
1665 * the page forward again.
1666 */
1667 switch (ret) {
1668 case RB_PAGE_HEAD:
1669 case RB_PAGE_NORMAL:
1670 /* OK */
1671 break;
1672 default:
1673 RB_WARN_ON(cpu_buffer, 1);
1674 return -1;
1675 }
1676
1677 /*
1678 * It is possible that an interrupt came in,
1679 * set the head up, then more interrupts came in
1680 * and moved it again. When we get back here,
1681 * the page would have been set to NORMAL but we
1682 * just set it back to HEAD.
1683 *
1684 * How do you detect this? Well, if that happened
1685 * the tail page would have moved.
1686 */
1687 if (ret == RB_PAGE_NORMAL) {
1688 /*
1689 * If the tail had moved passed next, then we need
1690 * to reset the pointer.
1691 */
1692 if (cpu_buffer->tail_page != tail_page &&
1693 cpu_buffer->tail_page != next_page)
1694 rb_head_page_set_normal(cpu_buffer, new_head,
1695 next_page,
1696 RB_PAGE_HEAD);
1697 }
1698
1699 /*
1700 * If this was the outer most commit (the one that
1701 * changed the original pointer from HEAD to UPDATE),
1702 * then it is up to us to reset it to NORMAL.
1703 */
1704 if (type == RB_PAGE_HEAD) {
1705 ret = rb_head_page_set_normal(cpu_buffer, next_page,
1706 tail_page,
1707 RB_PAGE_UPDATE);
1708 if (RB_WARN_ON(cpu_buffer,
1709 ret != RB_PAGE_UPDATE))
1710 return -1;
1711 }
1712
1713 return 0;
1714}
1715
1124static unsigned rb_calculate_event_length(unsigned length) 1716static unsigned rb_calculate_event_length(unsigned length)
1125{ 1717{
1126 struct ring_buffer_event event; /* Used only for sizeof array */ 1718 struct ring_buffer_event event; /* Used only for sizeof array */
@@ -1184,9 +1776,6 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
1184 event->type_len = RINGBUF_TYPE_PADDING; 1776 event->type_len = RINGBUF_TYPE_PADDING;
1185 /* time delta must be non zero */ 1777 /* time delta must be non zero */
1186 event->time_delta = 1; 1778 event->time_delta = 1;
1187 /* Account for this as an entry */
1188 local_inc(&tail_page->entries);
1189 local_inc(&cpu_buffer->entries);
1190 1779
1191 /* Set write to end of buffer */ 1780 /* Set write to end of buffer */
1192 length = (tail + length) - BUF_PAGE_SIZE; 1781 length = (tail + length) - BUF_PAGE_SIZE;
@@ -1199,96 +1788,93 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
1199 struct buffer_page *commit_page, 1788 struct buffer_page *commit_page,
1200 struct buffer_page *tail_page, u64 *ts) 1789 struct buffer_page *tail_page, u64 *ts)
1201{ 1790{
1202 struct buffer_page *next_page, *head_page, *reader_page;
1203 struct ring_buffer *buffer = cpu_buffer->buffer; 1791 struct ring_buffer *buffer = cpu_buffer->buffer;
1204 bool lock_taken = false; 1792 struct buffer_page *next_page;
1205 unsigned long flags; 1793 int ret;
1206 1794
1207 next_page = tail_page; 1795 next_page = tail_page;
1208 1796
1209 local_irq_save(flags);
1210 /*
1211 * Since the write to the buffer is still not
1212 * fully lockless, we must be careful with NMIs.
1213 * The locks in the writers are taken when a write
1214 * crosses to a new page. The locks protect against
1215 * races with the readers (this will soon be fixed
1216 * with a lockless solution).
1217 *
1218 * Because we can not protect against NMIs, and we
1219 * want to keep traces reentrant, we need to manage
1220 * what happens when we are in an NMI.
1221 *
1222 * NMIs can happen after we take the lock.
1223 * If we are in an NMI, only take the lock
1224 * if it is not already taken. Otherwise
1225 * simply fail.
1226 */
1227 if (unlikely(in_nmi())) {
1228 if (!__raw_spin_trylock(&cpu_buffer->lock)) {
1229 cpu_buffer->nmi_dropped++;
1230 goto out_reset;
1231 }
1232 } else
1233 __raw_spin_lock(&cpu_buffer->lock);
1234
1235 lock_taken = true;
1236
1237 rb_inc_page(cpu_buffer, &next_page); 1797 rb_inc_page(cpu_buffer, &next_page);
1238 1798
1239 head_page = cpu_buffer->head_page;
1240 reader_page = cpu_buffer->reader_page;
1241
1242 /* we grabbed the lock before incrementing */
1243 if (RB_WARN_ON(cpu_buffer, next_page == reader_page))
1244 goto out_reset;
1245
1246 /* 1799 /*
1247 * If for some reason, we had an interrupt storm that made 1800 * If for some reason, we had an interrupt storm that made
1248 * it all the way around the buffer, bail, and warn 1801 * it all the way around the buffer, bail, and warn
1249 * about it. 1802 * about it.
1250 */ 1803 */
1251 if (unlikely(next_page == commit_page)) { 1804 if (unlikely(next_page == commit_page)) {
1252 cpu_buffer->commit_overrun++; 1805 local_inc(&cpu_buffer->commit_overrun);
1253 goto out_reset; 1806 goto out_reset;
1254 } 1807 }
1255 1808
1256 if (next_page == head_page) { 1809 /*
1257 if (!(buffer->flags & RB_FL_OVERWRITE)) 1810 * This is where the fun begins!
1258 goto out_reset; 1811 *
1259 1812 * We are fighting against races between a reader that
1260 /* tail_page has not moved yet? */ 1813 * could be on another CPU trying to swap its reader
1261 if (tail_page == cpu_buffer->tail_page) { 1814 * page with the buffer head.
1262 /* count overflows */ 1815 *
1263 cpu_buffer->overrun += 1816 * We are also fighting against interrupts coming in and
1264 local_read(&head_page->entries); 1817 * moving the head or tail on us as well.
1818 *
1819 * If the next page is the head page then we have filled
1820 * the buffer, unless the commit page is still on the
1821 * reader page.
1822 */
1823 if (rb_is_head_page(cpu_buffer, next_page, &tail_page->list)) {
1265 1824
1266 rb_inc_page(cpu_buffer, &head_page); 1825 /*
1267 cpu_buffer->head_page = head_page; 1826 * If the commit is not on the reader page, then
1268 cpu_buffer->head_page->read = 0; 1827 * move the header page.
1828 */
1829 if (!rb_is_reader_page(cpu_buffer->commit_page)) {
1830 /*
1831 * If we are not in overwrite mode,
1832 * this is easy, just stop here.
1833 */
1834 if (!(buffer->flags & RB_FL_OVERWRITE))
1835 goto out_reset;
1836
1837 ret = rb_handle_head_page(cpu_buffer,
1838 tail_page,
1839 next_page);
1840 if (ret < 0)
1841 goto out_reset;
1842 if (ret)
1843 goto out_again;
1844 } else {
1845 /*
1846 * We need to be careful here too. The
1847 * commit page could still be on the reader
1848 * page. We could have a small buffer, and
1849 * have filled up the buffer with events
1850 * from interrupts and such, and wrapped.
1851 *
1852 * Note, if the tail page is also the on the
1853 * reader_page, we let it move out.
1854 */
1855 if (unlikely((cpu_buffer->commit_page !=
1856 cpu_buffer->tail_page) &&
1857 (cpu_buffer->commit_page ==
1858 cpu_buffer->reader_page))) {
1859 local_inc(&cpu_buffer->commit_overrun);
1860 goto out_reset;
1861 }
1269 } 1862 }
1270 } 1863 }
1271 1864
1272 /* 1865 ret = rb_tail_page_update(cpu_buffer, tail_page, next_page);
1273 * If the tail page is still the same as what we think 1866 if (ret) {
1274 * it is, then it is up to us to update the tail 1867 /*
1275 * pointer. 1868 * Nested commits always have zero deltas, so
1276 */ 1869 * just reread the time stamp
1277 if (tail_page == cpu_buffer->tail_page) { 1870 */
1278 local_set(&next_page->write, 0);
1279 local_set(&next_page->entries, 0);
1280 local_set(&next_page->page->commit, 0);
1281 cpu_buffer->tail_page = next_page;
1282
1283 /* reread the time stamp */
1284 *ts = rb_time_stamp(buffer, cpu_buffer->cpu); 1871 *ts = rb_time_stamp(buffer, cpu_buffer->cpu);
1285 cpu_buffer->tail_page->page->time_stamp = *ts; 1872 next_page->page->time_stamp = *ts;
1286 } 1873 }
1287 1874
1288 rb_reset_tail(cpu_buffer, tail_page, tail, length); 1875 out_again:
1289 1876
1290 __raw_spin_unlock(&cpu_buffer->lock); 1877 rb_reset_tail(cpu_buffer, tail_page, tail, length);
1291 local_irq_restore(flags);
1292 1878
1293 /* fail and let the caller try again */ 1879 /* fail and let the caller try again */
1294 return ERR_PTR(-EAGAIN); 1880 return ERR_PTR(-EAGAIN);
@@ -1297,9 +1883,6 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
1297 /* reset write */ 1883 /* reset write */
1298 rb_reset_tail(cpu_buffer, tail_page, tail, length); 1884 rb_reset_tail(cpu_buffer, tail_page, tail, length);
1299 1885
1300 if (likely(lock_taken))
1301 __raw_spin_unlock(&cpu_buffer->lock);
1302 local_irq_restore(flags);
1303 return NULL; 1886 return NULL;
1304} 1887}
1305 1888
@@ -1316,6 +1899,9 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
1316 barrier(); 1899 barrier();
1317 tail_page = cpu_buffer->tail_page; 1900 tail_page = cpu_buffer->tail_page;
1318 write = local_add_return(length, &tail_page->write); 1901 write = local_add_return(length, &tail_page->write);
1902
1903 /* set write to only the index of the write */
1904 write &= RB_WRITE_MASK;
1319 tail = write - length; 1905 tail = write - length;
1320 1906
1321 /* See if we shot pass the end of this buffer page */ 1907 /* See if we shot pass the end of this buffer page */
@@ -1360,12 +1946,16 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
1360 bpage = cpu_buffer->tail_page; 1946 bpage = cpu_buffer->tail_page;
1361 1947
1362 if (bpage->page == (void *)addr && rb_page_write(bpage) == old_index) { 1948 if (bpage->page == (void *)addr && rb_page_write(bpage) == old_index) {
1949 unsigned long write_mask =
1950 local_read(&bpage->write) & ~RB_WRITE_MASK;
1363 /* 1951 /*
1364 * This is on the tail page. It is possible that 1952 * This is on the tail page. It is possible that
1365 * a write could come in and move the tail page 1953 * a write could come in and move the tail page
1366 * and write to the next page. That is fine 1954 * and write to the next page. That is fine
1367 * because we just shorten what is on this page. 1955 * because we just shorten what is on this page.
1368 */ 1956 */
1957 old_index += write_mask;
1958 new_index += write_mask;
1369 index = local_cmpxchg(&bpage->write, old_index, new_index); 1959 index = local_cmpxchg(&bpage->write, old_index, new_index);
1370 if (index == old_index) 1960 if (index == old_index)
1371 return 1; 1961 return 1;
@@ -1481,7 +2071,8 @@ static void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer)
1481} 2071}
1482 2072
1483static struct ring_buffer_event * 2073static struct ring_buffer_event *
1484rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, 2074rb_reserve_next_event(struct ring_buffer *buffer,
2075 struct ring_buffer_per_cpu *cpu_buffer,
1485 unsigned long length) 2076 unsigned long length)
1486{ 2077{
1487 struct ring_buffer_event *event; 2078 struct ring_buffer_event *event;
@@ -1491,6 +2082,21 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
1491 2082
1492 rb_start_commit(cpu_buffer); 2083 rb_start_commit(cpu_buffer);
1493 2084
2085#ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
2086 /*
2087 * Due to the ability to swap a cpu buffer from a buffer
2088 * it is possible it was swapped before we committed.
2089 * (committing stops a swap). We check for it here and
2090 * if it happened, we have to fail the write.
2091 */
2092 barrier();
2093 if (unlikely(ACCESS_ONCE(cpu_buffer->buffer) != buffer)) {
2094 local_dec(&cpu_buffer->committing);
2095 local_dec(&cpu_buffer->commits);
2096 return NULL;
2097 }
2098#endif
2099
1494 length = rb_calculate_event_length(length); 2100 length = rb_calculate_event_length(length);
1495 again: 2101 again:
1496 /* 2102 /*
@@ -1651,7 +2257,7 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
1651 if (length > BUF_MAX_DATA_SIZE) 2257 if (length > BUF_MAX_DATA_SIZE)
1652 goto out; 2258 goto out;
1653 2259
1654 event = rb_reserve_next_event(cpu_buffer, length); 2260 event = rb_reserve_next_event(buffer, cpu_buffer, length);
1655 if (!event) 2261 if (!event)
1656 goto out; 2262 goto out;
1657 2263
@@ -1674,18 +2280,23 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
1674} 2280}
1675EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve); 2281EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve);
1676 2282
1677static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer, 2283static void
2284rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer,
1678 struct ring_buffer_event *event) 2285 struct ring_buffer_event *event)
1679{ 2286{
1680 local_inc(&cpu_buffer->entries);
1681
1682 /* 2287 /*
1683 * The event first in the commit queue updates the 2288 * The event first in the commit queue updates the
1684 * time stamp. 2289 * time stamp.
1685 */ 2290 */
1686 if (rb_event_is_commit(cpu_buffer, event)) 2291 if (rb_event_is_commit(cpu_buffer, event))
1687 cpu_buffer->write_stamp += event->time_delta; 2292 cpu_buffer->write_stamp += event->time_delta;
2293}
1688 2294
2295static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
2296 struct ring_buffer_event *event)
2297{
2298 local_inc(&cpu_buffer->entries);
2299 rb_update_write_stamp(cpu_buffer, event);
1689 rb_end_commit(cpu_buffer); 2300 rb_end_commit(cpu_buffer);
1690} 2301}
1691 2302
@@ -1732,32 +2343,57 @@ static inline void rb_event_discard(struct ring_buffer_event *event)
1732 event->time_delta = 1; 2343 event->time_delta = 1;
1733} 2344}
1734 2345
1735/** 2346/*
1736 * ring_buffer_event_discard - discard any event in the ring buffer 2347 * Decrement the entries to the page that an event is on.
1737 * @event: the event to discard 2348 * The event does not even need to exist, only the pointer
1738 * 2349 * to the page it is on. This may only be called before the commit
1739 * Sometimes a event that is in the ring buffer needs to be ignored. 2350 * takes place.
1740 * This function lets the user discard an event in the ring buffer
1741 * and then that event will not be read later.
1742 *
1743 * Note, it is up to the user to be careful with this, and protect
1744 * against races. If the user discards an event that has been consumed
1745 * it is possible that it could corrupt the ring buffer.
1746 */ 2351 */
1747void ring_buffer_event_discard(struct ring_buffer_event *event) 2352static inline void
2353rb_decrement_entry(struct ring_buffer_per_cpu *cpu_buffer,
2354 struct ring_buffer_event *event)
1748{ 2355{
1749 rb_event_discard(event); 2356 unsigned long addr = (unsigned long)event;
2357 struct buffer_page *bpage = cpu_buffer->commit_page;
2358 struct buffer_page *start;
2359
2360 addr &= PAGE_MASK;
2361
2362 /* Do the likely case first */
2363 if (likely(bpage->page == (void *)addr)) {
2364 local_dec(&bpage->entries);
2365 return;
2366 }
2367
2368 /*
2369 * Because the commit page may be on the reader page we
2370 * start with the next page and check the end loop there.
2371 */
2372 rb_inc_page(cpu_buffer, &bpage);
2373 start = bpage;
2374 do {
2375 if (bpage->page == (void *)addr) {
2376 local_dec(&bpage->entries);
2377 return;
2378 }
2379 rb_inc_page(cpu_buffer, &bpage);
2380 } while (bpage != start);
2381
2382 /* commit not part of this buffer?? */
2383 RB_WARN_ON(cpu_buffer, 1);
1750} 2384}
1751EXPORT_SYMBOL_GPL(ring_buffer_event_discard);
1752 2385
1753/** 2386/**
1754 * ring_buffer_commit_discard - discard an event that has not been committed 2387 * ring_buffer_commit_discard - discard an event that has not been committed
1755 * @buffer: the ring buffer 2388 * @buffer: the ring buffer
1756 * @event: non committed event to discard 2389 * @event: non committed event to discard
1757 * 2390 *
1758 * This is similar to ring_buffer_event_discard but must only be 2391 * Sometimes an event that is in the ring buffer needs to be ignored.
1759 * performed on an event that has not been committed yet. The difference 2392 * This function lets the user discard an event in the ring buffer
1760 * is that this will also try to free the event from the ring buffer 2393 * and then that event will not be read later.
2394 *
2395 * This function only works if it is called before the the item has been
2396 * committed. It will try to free the event from the ring buffer
1761 * if another event has not been added behind it. 2397 * if another event has not been added behind it.
1762 * 2398 *
1763 * If another event has been added behind it, it will set the event 2399 * If another event has been added behind it, it will set the event
@@ -1785,14 +2421,15 @@ void ring_buffer_discard_commit(struct ring_buffer *buffer,
1785 */ 2421 */
1786 RB_WARN_ON(buffer, !local_read(&cpu_buffer->committing)); 2422 RB_WARN_ON(buffer, !local_read(&cpu_buffer->committing));
1787 2423
1788 if (!rb_try_to_discard(cpu_buffer, event)) 2424 rb_decrement_entry(cpu_buffer, event);
2425 if (rb_try_to_discard(cpu_buffer, event))
1789 goto out; 2426 goto out;
1790 2427
1791 /* 2428 /*
1792 * The commit is still visible by the reader, so we 2429 * The commit is still visible by the reader, so we
1793 * must increment entries. 2430 * must still update the timestamp.
1794 */ 2431 */
1795 local_inc(&cpu_buffer->entries); 2432 rb_update_write_stamp(cpu_buffer, event);
1796 out: 2433 out:
1797 rb_end_commit(cpu_buffer); 2434 rb_end_commit(cpu_buffer);
1798 2435
@@ -1853,7 +2490,7 @@ int ring_buffer_write(struct ring_buffer *buffer,
1853 if (length > BUF_MAX_DATA_SIZE) 2490 if (length > BUF_MAX_DATA_SIZE)
1854 goto out; 2491 goto out;
1855 2492
1856 event = rb_reserve_next_event(cpu_buffer, length); 2493 event = rb_reserve_next_event(buffer, cpu_buffer, length);
1857 if (!event) 2494 if (!event)
1858 goto out; 2495 goto out;
1859 2496
@@ -1874,9 +2511,13 @@ EXPORT_SYMBOL_GPL(ring_buffer_write);
1874static int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer) 2511static int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer)
1875{ 2512{
1876 struct buffer_page *reader = cpu_buffer->reader_page; 2513 struct buffer_page *reader = cpu_buffer->reader_page;
1877 struct buffer_page *head = cpu_buffer->head_page; 2514 struct buffer_page *head = rb_set_head_page(cpu_buffer);
1878 struct buffer_page *commit = cpu_buffer->commit_page; 2515 struct buffer_page *commit = cpu_buffer->commit_page;
1879 2516
2517 /* In case of error, head will be NULL */
2518 if (unlikely(!head))
2519 return 1;
2520
1880 return reader->read == rb_page_commit(reader) && 2521 return reader->read == rb_page_commit(reader) &&
1881 (commit == reader || 2522 (commit == reader ||
1882 (commit == head && 2523 (commit == head &&
@@ -1967,7 +2608,7 @@ unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu)
1967 return 0; 2608 return 0;
1968 2609
1969 cpu_buffer = buffer->buffers[cpu]; 2610 cpu_buffer = buffer->buffers[cpu];
1970 ret = (local_read(&cpu_buffer->entries) - cpu_buffer->overrun) 2611 ret = (local_read(&cpu_buffer->entries) - local_read(&cpu_buffer->overrun))
1971 - cpu_buffer->read; 2612 - cpu_buffer->read;
1972 2613
1973 return ret; 2614 return ret;
@@ -1988,33 +2629,13 @@ unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu)
1988 return 0; 2629 return 0;
1989 2630
1990 cpu_buffer = buffer->buffers[cpu]; 2631 cpu_buffer = buffer->buffers[cpu];
1991 ret = cpu_buffer->overrun; 2632 ret = local_read(&cpu_buffer->overrun);
1992 2633
1993 return ret; 2634 return ret;
1994} 2635}
1995EXPORT_SYMBOL_GPL(ring_buffer_overrun_cpu); 2636EXPORT_SYMBOL_GPL(ring_buffer_overrun_cpu);
1996 2637
1997/** 2638/**
1998 * ring_buffer_nmi_dropped_cpu - get the number of nmis that were dropped
1999 * @buffer: The ring buffer
2000 * @cpu: The per CPU buffer to get the number of overruns from
2001 */
2002unsigned long ring_buffer_nmi_dropped_cpu(struct ring_buffer *buffer, int cpu)
2003{
2004 struct ring_buffer_per_cpu *cpu_buffer;
2005 unsigned long ret;
2006
2007 if (!cpumask_test_cpu(cpu, buffer->cpumask))
2008 return 0;
2009
2010 cpu_buffer = buffer->buffers[cpu];
2011 ret = cpu_buffer->nmi_dropped;
2012
2013 return ret;
2014}
2015EXPORT_SYMBOL_GPL(ring_buffer_nmi_dropped_cpu);
2016
2017/**
2018 * ring_buffer_commit_overrun_cpu - get the number of overruns caused by commits 2639 * ring_buffer_commit_overrun_cpu - get the number of overruns caused by commits
2019 * @buffer: The ring buffer 2640 * @buffer: The ring buffer
2020 * @cpu: The per CPU buffer to get the number of overruns from 2641 * @cpu: The per CPU buffer to get the number of overruns from
@@ -2029,7 +2650,7 @@ ring_buffer_commit_overrun_cpu(struct ring_buffer *buffer, int cpu)
2029 return 0; 2650 return 0;
2030 2651
2031 cpu_buffer = buffer->buffers[cpu]; 2652 cpu_buffer = buffer->buffers[cpu];
2032 ret = cpu_buffer->commit_overrun; 2653 ret = local_read(&cpu_buffer->commit_overrun);
2033 2654
2034 return ret; 2655 return ret;
2035} 2656}
@@ -2052,7 +2673,7 @@ unsigned long ring_buffer_entries(struct ring_buffer *buffer)
2052 for_each_buffer_cpu(buffer, cpu) { 2673 for_each_buffer_cpu(buffer, cpu) {
2053 cpu_buffer = buffer->buffers[cpu]; 2674 cpu_buffer = buffer->buffers[cpu];
2054 entries += (local_read(&cpu_buffer->entries) - 2675 entries += (local_read(&cpu_buffer->entries) -
2055 cpu_buffer->overrun) - cpu_buffer->read; 2676 local_read(&cpu_buffer->overrun)) - cpu_buffer->read;
2056 } 2677 }
2057 2678
2058 return entries; 2679 return entries;
@@ -2075,7 +2696,7 @@ unsigned long ring_buffer_overruns(struct ring_buffer *buffer)
2075 /* if you care about this being correct, lock the buffer */ 2696 /* if you care about this being correct, lock the buffer */
2076 for_each_buffer_cpu(buffer, cpu) { 2697 for_each_buffer_cpu(buffer, cpu) {
2077 cpu_buffer = buffer->buffers[cpu]; 2698 cpu_buffer = buffer->buffers[cpu];
2078 overruns += cpu_buffer->overrun; 2699 overruns += local_read(&cpu_buffer->overrun);
2079 } 2700 }
2080 2701
2081 return overruns; 2702 return overruns;
@@ -2088,8 +2709,10 @@ static void rb_iter_reset(struct ring_buffer_iter *iter)
2088 2709
2089 /* Iterator usage is expected to have record disabled */ 2710 /* Iterator usage is expected to have record disabled */
2090 if (list_empty(&cpu_buffer->reader_page->list)) { 2711 if (list_empty(&cpu_buffer->reader_page->list)) {
2091 iter->head_page = cpu_buffer->head_page; 2712 iter->head_page = rb_set_head_page(cpu_buffer);
2092 iter->head = cpu_buffer->head_page->read; 2713 if (unlikely(!iter->head_page))
2714 return;
2715 iter->head = iter->head_page->read;
2093 } else { 2716 } else {
2094 iter->head_page = cpu_buffer->reader_page; 2717 iter->head_page = cpu_buffer->reader_page;
2095 iter->head = cpu_buffer->reader_page->read; 2718 iter->head = cpu_buffer->reader_page->read;
@@ -2206,6 +2829,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
2206 struct buffer_page *reader = NULL; 2829 struct buffer_page *reader = NULL;
2207 unsigned long flags; 2830 unsigned long flags;
2208 int nr_loops = 0; 2831 int nr_loops = 0;
2832 int ret;
2209 2833
2210 local_irq_save(flags); 2834 local_irq_save(flags);
2211 __raw_spin_lock(&cpu_buffer->lock); 2835 __raw_spin_lock(&cpu_buffer->lock);
@@ -2239,30 +2863,56 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
2239 goto out; 2863 goto out;
2240 2864
2241 /* 2865 /*
2242 * Splice the empty reader page into the list around the head.
2243 * Reset the reader page to size zero. 2866 * Reset the reader page to size zero.
2244 */ 2867 */
2868 local_set(&cpu_buffer->reader_page->write, 0);
2869 local_set(&cpu_buffer->reader_page->entries, 0);
2870 local_set(&cpu_buffer->reader_page->page->commit, 0);
2245 2871
2246 reader = cpu_buffer->head_page; 2872 spin:
2873 /*
2874 * Splice the empty reader page into the list around the head.
2875 */
2876 reader = rb_set_head_page(cpu_buffer);
2247 cpu_buffer->reader_page->list.next = reader->list.next; 2877 cpu_buffer->reader_page->list.next = reader->list.next;
2248 cpu_buffer->reader_page->list.prev = reader->list.prev; 2878 cpu_buffer->reader_page->list.prev = reader->list.prev;
2249 2879
2250 local_set(&cpu_buffer->reader_page->write, 0); 2880 /*
2251 local_set(&cpu_buffer->reader_page->entries, 0); 2881 * cpu_buffer->pages just needs to point to the buffer, it
2252 local_set(&cpu_buffer->reader_page->page->commit, 0); 2882 * has no specific buffer page to point to. Lets move it out
2883 * of our way so we don't accidently swap it.
2884 */
2885 cpu_buffer->pages = reader->list.prev;
2253 2886
2254 /* Make the reader page now replace the head */ 2887 /* The reader page will be pointing to the new head */
2255 reader->list.prev->next = &cpu_buffer->reader_page->list; 2888 rb_set_list_to_head(cpu_buffer, &cpu_buffer->reader_page->list);
2256 reader->list.next->prev = &cpu_buffer->reader_page->list;
2257 2889
2258 /* 2890 /*
2259 * If the tail is on the reader, then we must set the head 2891 * Here's the tricky part.
2260 * to the inserted page, otherwise we set it one before. 2892 *
2893 * We need to move the pointer past the header page.
2894 * But we can only do that if a writer is not currently
2895 * moving it. The page before the header page has the
2896 * flag bit '1' set if it is pointing to the page we want.
2897 * but if the writer is in the process of moving it
2898 * than it will be '2' or already moved '0'.
2261 */ 2899 */
2262 cpu_buffer->head_page = cpu_buffer->reader_page;
2263 2900
2264 if (cpu_buffer->commit_page != reader) 2901 ret = rb_head_page_replace(reader, cpu_buffer->reader_page);
2265 rb_inc_page(cpu_buffer, &cpu_buffer->head_page); 2902
2903 /*
2904 * If we did not convert it, then we must try again.
2905 */
2906 if (!ret)
2907 goto spin;
2908
2909 /*
2910 * Yeah! We succeeded in replacing the page.
2911 *
2912 * Now make the new head point back to the reader page.
2913 */
2914 reader->list.next->prev = &cpu_buffer->reader_page->list;
2915 rb_inc_page(cpu_buffer, &cpu_buffer->head_page);
2266 2916
2267 /* Finally update the reader page to the new head */ 2917 /* Finally update the reader page to the new head */
2268 cpu_buffer->reader_page = reader; 2918 cpu_buffer->reader_page = reader;
@@ -2291,8 +2941,7 @@ static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer)
2291 2941
2292 event = rb_reader_event(cpu_buffer); 2942 event = rb_reader_event(cpu_buffer);
2293 2943
2294 if (event->type_len <= RINGBUF_TYPE_DATA_TYPE_LEN_MAX 2944 if (event->type_len <= RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
2295 || rb_discarded_event(event))
2296 cpu_buffer->read++; 2945 cpu_buffer->read++;
2297 2946
2298 rb_update_read_stamp(cpu_buffer, event); 2947 rb_update_read_stamp(cpu_buffer, event);
@@ -2346,15 +2995,12 @@ static void rb_advance_iter(struct ring_buffer_iter *iter)
2346} 2995}
2347 2996
2348static struct ring_buffer_event * 2997static struct ring_buffer_event *
2349rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) 2998rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts)
2350{ 2999{
2351 struct ring_buffer_per_cpu *cpu_buffer;
2352 struct ring_buffer_event *event; 3000 struct ring_buffer_event *event;
2353 struct buffer_page *reader; 3001 struct buffer_page *reader;
2354 int nr_loops = 0; 3002 int nr_loops = 0;
2355 3003
2356 cpu_buffer = buffer->buffers[cpu];
2357
2358 again: 3004 again:
2359 /* 3005 /*
2360 * We repeat when a timestamp is encountered. It is possible 3006 * We repeat when a timestamp is encountered. It is possible
@@ -2383,7 +3029,6 @@ rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
2383 * the box. Return the padding, and we will release 3029 * the box. Return the padding, and we will release
2384 * the current locks, and try again. 3030 * the current locks, and try again.
2385 */ 3031 */
2386 rb_advance_reader(cpu_buffer);
2387 return event; 3032 return event;
2388 3033
2389 case RINGBUF_TYPE_TIME_EXTEND: 3034 case RINGBUF_TYPE_TIME_EXTEND:
@@ -2399,7 +3044,7 @@ rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
2399 case RINGBUF_TYPE_DATA: 3044 case RINGBUF_TYPE_DATA:
2400 if (ts) { 3045 if (ts) {
2401 *ts = cpu_buffer->read_stamp + event->time_delta; 3046 *ts = cpu_buffer->read_stamp + event->time_delta;
2402 ring_buffer_normalize_time_stamp(buffer, 3047 ring_buffer_normalize_time_stamp(cpu_buffer->buffer,
2403 cpu_buffer->cpu, ts); 3048 cpu_buffer->cpu, ts);
2404 } 3049 }
2405 return event; 3050 return event;
@@ -2486,7 +3131,7 @@ static inline int rb_ok_to_lock(void)
2486 * buffer too. A one time deal is all you get from reading 3131 * buffer too. A one time deal is all you get from reading
2487 * the ring buffer from an NMI. 3132 * the ring buffer from an NMI.
2488 */ 3133 */
2489 if (likely(!in_nmi() && !oops_in_progress)) 3134 if (likely(!in_nmi()))
2490 return 1; 3135 return 1;
2491 3136
2492 tracing_off_permanent(); 3137 tracing_off_permanent();
@@ -2518,15 +3163,15 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
2518 local_irq_save(flags); 3163 local_irq_save(flags);
2519 if (dolock) 3164 if (dolock)
2520 spin_lock(&cpu_buffer->reader_lock); 3165 spin_lock(&cpu_buffer->reader_lock);
2521 event = rb_buffer_peek(buffer, cpu, ts); 3166 event = rb_buffer_peek(cpu_buffer, ts);
3167 if (event && event->type_len == RINGBUF_TYPE_PADDING)
3168 rb_advance_reader(cpu_buffer);
2522 if (dolock) 3169 if (dolock)
2523 spin_unlock(&cpu_buffer->reader_lock); 3170 spin_unlock(&cpu_buffer->reader_lock);
2524 local_irq_restore(flags); 3171 local_irq_restore(flags);
2525 3172
2526 if (event && event->type_len == RINGBUF_TYPE_PADDING) { 3173 if (event && event->type_len == RINGBUF_TYPE_PADDING)
2527 cpu_relax();
2528 goto again; 3174 goto again;
2529 }
2530 3175
2531 return event; 3176 return event;
2532} 3177}
@@ -2551,10 +3196,8 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
2551 event = rb_iter_peek(iter, ts); 3196 event = rb_iter_peek(iter, ts);
2552 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 3197 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
2553 3198
2554 if (event && event->type_len == RINGBUF_TYPE_PADDING) { 3199 if (event && event->type_len == RINGBUF_TYPE_PADDING)
2555 cpu_relax();
2556 goto again; 3200 goto again;
2557 }
2558 3201
2559 return event; 3202 return event;
2560} 3203}
@@ -2589,13 +3232,10 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
2589 if (dolock) 3232 if (dolock)
2590 spin_lock(&cpu_buffer->reader_lock); 3233 spin_lock(&cpu_buffer->reader_lock);
2591 3234
2592 event = rb_buffer_peek(buffer, cpu, ts); 3235 event = rb_buffer_peek(cpu_buffer, ts);
2593 if (!event) 3236 if (event)
2594 goto out_unlock; 3237 rb_advance_reader(cpu_buffer);
2595
2596 rb_advance_reader(cpu_buffer);
2597 3238
2598 out_unlock:
2599 if (dolock) 3239 if (dolock)
2600 spin_unlock(&cpu_buffer->reader_lock); 3240 spin_unlock(&cpu_buffer->reader_lock);
2601 local_irq_restore(flags); 3241 local_irq_restore(flags);
@@ -2603,10 +3243,8 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
2603 out: 3243 out:
2604 preempt_enable(); 3244 preempt_enable();
2605 3245
2606 if (event && event->type_len == RINGBUF_TYPE_PADDING) { 3246 if (event && event->type_len == RINGBUF_TYPE_PADDING)
2607 cpu_relax();
2608 goto again; 3247 goto again;
2609 }
2610 3248
2611 return event; 3249 return event;
2612} 3250}
@@ -2686,21 +3324,19 @@ ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts)
2686 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; 3324 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
2687 unsigned long flags; 3325 unsigned long flags;
2688 3326
2689 again:
2690 spin_lock_irqsave(&cpu_buffer->reader_lock, flags); 3327 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
3328 again:
2691 event = rb_iter_peek(iter, ts); 3329 event = rb_iter_peek(iter, ts);
2692 if (!event) 3330 if (!event)
2693 goto out; 3331 goto out;
2694 3332
3333 if (event->type_len == RINGBUF_TYPE_PADDING)
3334 goto again;
3335
2695 rb_advance_iter(iter); 3336 rb_advance_iter(iter);
2696 out: 3337 out:
2697 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 3338 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
2698 3339
2699 if (event && event->type_len == RINGBUF_TYPE_PADDING) {
2700 cpu_relax();
2701 goto again;
2702 }
2703
2704 return event; 3340 return event;
2705} 3341}
2706EXPORT_SYMBOL_GPL(ring_buffer_read); 3342EXPORT_SYMBOL_GPL(ring_buffer_read);
@@ -2718,8 +3354,10 @@ EXPORT_SYMBOL_GPL(ring_buffer_size);
2718static void 3354static void
2719rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer) 3355rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
2720{ 3356{
3357 rb_head_page_deactivate(cpu_buffer);
3358
2721 cpu_buffer->head_page 3359 cpu_buffer->head_page
2722 = list_entry(cpu_buffer->pages.next, struct buffer_page, list); 3360 = list_entry(cpu_buffer->pages, struct buffer_page, list);
2723 local_set(&cpu_buffer->head_page->write, 0); 3361 local_set(&cpu_buffer->head_page->write, 0);
2724 local_set(&cpu_buffer->head_page->entries, 0); 3362 local_set(&cpu_buffer->head_page->entries, 0);
2725 local_set(&cpu_buffer->head_page->page->commit, 0); 3363 local_set(&cpu_buffer->head_page->page->commit, 0);
@@ -2735,16 +3373,17 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
2735 local_set(&cpu_buffer->reader_page->page->commit, 0); 3373 local_set(&cpu_buffer->reader_page->page->commit, 0);
2736 cpu_buffer->reader_page->read = 0; 3374 cpu_buffer->reader_page->read = 0;
2737 3375
2738 cpu_buffer->nmi_dropped = 0; 3376 local_set(&cpu_buffer->commit_overrun, 0);
2739 cpu_buffer->commit_overrun = 0; 3377 local_set(&cpu_buffer->overrun, 0);
2740 cpu_buffer->overrun = 0;
2741 cpu_buffer->read = 0;
2742 local_set(&cpu_buffer->entries, 0); 3378 local_set(&cpu_buffer->entries, 0);
2743 local_set(&cpu_buffer->committing, 0); 3379 local_set(&cpu_buffer->committing, 0);
2744 local_set(&cpu_buffer->commits, 0); 3380 local_set(&cpu_buffer->commits, 0);
3381 cpu_buffer->read = 0;
2745 3382
2746 cpu_buffer->write_stamp = 0; 3383 cpu_buffer->write_stamp = 0;
2747 cpu_buffer->read_stamp = 0; 3384 cpu_buffer->read_stamp = 0;
3385
3386 rb_head_page_activate(cpu_buffer);
2748} 3387}
2749 3388
2750/** 3389/**
@@ -2764,12 +3403,16 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
2764 3403
2765 spin_lock_irqsave(&cpu_buffer->reader_lock, flags); 3404 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
2766 3405
3406 if (RB_WARN_ON(cpu_buffer, local_read(&cpu_buffer->committing)))
3407 goto out;
3408
2767 __raw_spin_lock(&cpu_buffer->lock); 3409 __raw_spin_lock(&cpu_buffer->lock);
2768 3410
2769 rb_reset_cpu(cpu_buffer); 3411 rb_reset_cpu(cpu_buffer);
2770 3412
2771 __raw_spin_unlock(&cpu_buffer->lock); 3413 __raw_spin_unlock(&cpu_buffer->lock);
2772 3414
3415 out:
2773 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 3416 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
2774 3417
2775 atomic_dec(&cpu_buffer->record_disabled); 3418 atomic_dec(&cpu_buffer->record_disabled);
@@ -2852,6 +3495,7 @@ int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu)
2852} 3495}
2853EXPORT_SYMBOL_GPL(ring_buffer_empty_cpu); 3496EXPORT_SYMBOL_GPL(ring_buffer_empty_cpu);
2854 3497
3498#ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
2855/** 3499/**
2856 * ring_buffer_swap_cpu - swap a CPU buffer between two ring buffers 3500 * ring_buffer_swap_cpu - swap a CPU buffer between two ring buffers
2857 * @buffer_a: One buffer to swap with 3501 * @buffer_a: One buffer to swap with
@@ -2906,20 +3550,28 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
2906 atomic_inc(&cpu_buffer_a->record_disabled); 3550 atomic_inc(&cpu_buffer_a->record_disabled);
2907 atomic_inc(&cpu_buffer_b->record_disabled); 3551 atomic_inc(&cpu_buffer_b->record_disabled);
2908 3552
3553 ret = -EBUSY;
3554 if (local_read(&cpu_buffer_a->committing))
3555 goto out_dec;
3556 if (local_read(&cpu_buffer_b->committing))
3557 goto out_dec;
3558
2909 buffer_a->buffers[cpu] = cpu_buffer_b; 3559 buffer_a->buffers[cpu] = cpu_buffer_b;
2910 buffer_b->buffers[cpu] = cpu_buffer_a; 3560 buffer_b->buffers[cpu] = cpu_buffer_a;
2911 3561
2912 cpu_buffer_b->buffer = buffer_a; 3562 cpu_buffer_b->buffer = buffer_a;
2913 cpu_buffer_a->buffer = buffer_b; 3563 cpu_buffer_a->buffer = buffer_b;
2914 3564
3565 ret = 0;
3566
3567out_dec:
2915 atomic_dec(&cpu_buffer_a->record_disabled); 3568 atomic_dec(&cpu_buffer_a->record_disabled);
2916 atomic_dec(&cpu_buffer_b->record_disabled); 3569 atomic_dec(&cpu_buffer_b->record_disabled);
2917
2918 ret = 0;
2919out: 3570out:
2920 return ret; 3571 return ret;
2921} 3572}
2922EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu); 3573EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu);
3574#endif /* CONFIG_RING_BUFFER_ALLOW_SWAP */
2923 3575
2924/** 3576/**
2925 * ring_buffer_alloc_read_page - allocate a page to read from buffer 3577 * ring_buffer_alloc_read_page - allocate a page to read from buffer
@@ -3092,7 +3744,7 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
3092 read = 0; 3744 read = 0;
3093 } else { 3745 } else {
3094 /* update the entry counter */ 3746 /* update the entry counter */
3095 cpu_buffer->read += local_read(&reader->entries); 3747 cpu_buffer->read += rb_page_entries(reader);
3096 3748
3097 /* swap the pages */ 3749 /* swap the pages */
3098 rb_init_page(bpage); 3750 rb_init_page(bpage);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 8bc8d8afea6a..45068269ebb1 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -43,14 +43,11 @@
43 43
44#define TRACE_BUFFER_FLAGS (RB_FL_OVERWRITE) 44#define TRACE_BUFFER_FLAGS (RB_FL_OVERWRITE)
45 45
46unsigned long __read_mostly tracing_max_latency;
47unsigned long __read_mostly tracing_thresh;
48
49/* 46/*
50 * On boot up, the ring buffer is set to the minimum size, so that 47 * On boot up, the ring buffer is set to the minimum size, so that
51 * we do not waste memory on systems that are not using tracing. 48 * we do not waste memory on systems that are not using tracing.
52 */ 49 */
53static int ring_buffer_expanded; 50int ring_buffer_expanded;
54 51
55/* 52/*
56 * We need to change this state when a selftest is running. 53 * We need to change this state when a selftest is running.
@@ -64,7 +61,7 @@ static bool __read_mostly tracing_selftest_running;
64/* 61/*
65 * If a tracer is running, we do not want to run SELFTEST. 62 * If a tracer is running, we do not want to run SELFTEST.
66 */ 63 */
67static bool __read_mostly tracing_selftest_disabled; 64bool __read_mostly tracing_selftest_disabled;
68 65
69/* For tracers that don't implement custom flags */ 66/* For tracers that don't implement custom flags */
70static struct tracer_opt dummy_tracer_opt[] = { 67static struct tracer_opt dummy_tracer_opt[] = {
@@ -89,7 +86,7 @@ static int dummy_set_flag(u32 old_flags, u32 bit, int set)
89 */ 86 */
90static int tracing_disabled = 1; 87static int tracing_disabled = 1;
91 88
92static DEFINE_PER_CPU(local_t, ftrace_cpu_disabled); 89DEFINE_PER_CPU(local_t, ftrace_cpu_disabled);
93 90
94static inline void ftrace_disable_cpu(void) 91static inline void ftrace_disable_cpu(void)
95{ 92{
@@ -128,13 +125,13 @@ int ftrace_dump_on_oops;
128 125
129static int tracing_set_tracer(const char *buf); 126static int tracing_set_tracer(const char *buf);
130 127
131#define BOOTUP_TRACER_SIZE 100 128#define MAX_TRACER_SIZE 100
132static char bootup_tracer_buf[BOOTUP_TRACER_SIZE] __initdata; 129static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
133static char *default_bootup_tracer; 130static char *default_bootup_tracer;
134 131
135static int __init set_ftrace(char *str) 132static int __init set_ftrace(char *str)
136{ 133{
137 strncpy(bootup_tracer_buf, str, BOOTUP_TRACER_SIZE); 134 strncpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
138 default_bootup_tracer = bootup_tracer_buf; 135 default_bootup_tracer = bootup_tracer_buf;
139 /* We are using ftrace early, expand it */ 136 /* We are using ftrace early, expand it */
140 ring_buffer_expanded = 1; 137 ring_buffer_expanded = 1;
@@ -172,10 +169,11 @@ static struct trace_array global_trace;
172 169
173static DEFINE_PER_CPU(struct trace_array_cpu, global_trace_cpu); 170static DEFINE_PER_CPU(struct trace_array_cpu, global_trace_cpu);
174 171
175int filter_current_check_discard(struct ftrace_event_call *call, void *rec, 172int filter_current_check_discard(struct ring_buffer *buffer,
173 struct ftrace_event_call *call, void *rec,
176 struct ring_buffer_event *event) 174 struct ring_buffer_event *event)
177{ 175{
178 return filter_check_discard(call, rec, global_trace.buffer, event); 176 return filter_check_discard(call, rec, buffer, event);
179} 177}
180EXPORT_SYMBOL_GPL(filter_current_check_discard); 178EXPORT_SYMBOL_GPL(filter_current_check_discard);
181 179
@@ -244,13 +242,6 @@ static struct tracer *trace_types __read_mostly;
244static struct tracer *current_trace __read_mostly; 242static struct tracer *current_trace __read_mostly;
245 243
246/* 244/*
247 * max_tracer_type_len is used to simplify the allocating of
248 * buffers to read userspace tracer names. We keep track of
249 * the longest tracer name registered.
250 */
251static int max_tracer_type_len;
252
253/*
254 * trace_types_lock is used to protect the trace_types list. 245 * trace_types_lock is used to protect the trace_types list.
255 * This lock is also used to keep user access serialized. 246 * This lock is also used to keep user access serialized.
256 * Accesses from userspace will grab this lock while userspace 247 * Accesses from userspace will grab this lock while userspace
@@ -266,6 +257,9 @@ unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
266 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME | 257 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME |
267 TRACE_ITER_GRAPH_TIME; 258 TRACE_ITER_GRAPH_TIME;
268 259
260static int trace_stop_count;
261static DEFINE_SPINLOCK(tracing_start_lock);
262
269/** 263/**
270 * trace_wake_up - wake up tasks waiting for trace input 264 * trace_wake_up - wake up tasks waiting for trace input
271 * 265 *
@@ -274,12 +268,18 @@ unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
274 */ 268 */
275void trace_wake_up(void) 269void trace_wake_up(void)
276{ 270{
271 int cpu;
272
273 if (trace_flags & TRACE_ITER_BLOCK)
274 return;
277 /* 275 /*
278 * The runqueue_is_locked() can fail, but this is the best we 276 * The runqueue_is_locked() can fail, but this is the best we
279 * have for now: 277 * have for now:
280 */ 278 */
281 if (!(trace_flags & TRACE_ITER_BLOCK) && !runqueue_is_locked()) 279 cpu = get_cpu();
280 if (!runqueue_is_locked(cpu))
282 wake_up(&trace_wait); 281 wake_up(&trace_wait);
282 put_cpu();
283} 283}
284 284
285static int __init set_buf_size(char *str) 285static int __init set_buf_size(char *str)
@@ -323,49 +323,125 @@ static const char *trace_options[] = {
323 "printk-msg-only", 323 "printk-msg-only",
324 "context-info", 324 "context-info",
325 "latency-format", 325 "latency-format",
326 "global-clock",
327 "sleep-time", 326 "sleep-time",
328 "graph-time", 327 "graph-time",
329 NULL 328 NULL
330}; 329};
331 330
331static struct {
332 u64 (*func)(void);
333 const char *name;
334} trace_clocks[] = {
335 { trace_clock_local, "local" },
336 { trace_clock_global, "global" },
337};
338
339int trace_clock_id;
340
332/* 341/*
333 * ftrace_max_lock is used to protect the swapping of buffers 342 * trace_parser_get_init - gets the buffer for trace parser
334 * when taking a max snapshot. The buffers themselves are
335 * protected by per_cpu spinlocks. But the action of the swap
336 * needs its own lock.
337 *
338 * This is defined as a raw_spinlock_t in order to help
339 * with performance when lockdep debugging is enabled.
340 */ 343 */
341static raw_spinlock_t ftrace_max_lock = 344int trace_parser_get_init(struct trace_parser *parser, int size)
342 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; 345{
346 memset(parser, 0, sizeof(*parser));
347
348 parser->buffer = kmalloc(size, GFP_KERNEL);
349 if (!parser->buffer)
350 return 1;
351
352 parser->size = size;
353 return 0;
354}
343 355
344/* 356/*
345 * Copy the new maximum trace into the separate maximum-trace 357 * trace_parser_put - frees the buffer for trace parser
346 * structure. (this way the maximum trace is permanently saved,
347 * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
348 */ 358 */
349static void 359void trace_parser_put(struct trace_parser *parser)
350__update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
351{ 360{
352 struct trace_array_cpu *data = tr->data[cpu]; 361 kfree(parser->buffer);
362}
353 363
354 max_tr.cpu = cpu; 364/*
355 max_tr.time_start = data->preempt_timestamp; 365 * trace_get_user - reads the user input string separated by space
366 * (matched by isspace(ch))
367 *
368 * For each string found the 'struct trace_parser' is updated,
369 * and the function returns.
370 *
371 * Returns number of bytes read.
372 *
373 * See kernel/trace/trace.h for 'struct trace_parser' details.
374 */
375int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
376 size_t cnt, loff_t *ppos)
377{
378 char ch;
379 size_t read = 0;
380 ssize_t ret;
356 381
357 data = max_tr.data[cpu]; 382 if (!*ppos)
358 data->saved_latency = tracing_max_latency; 383 trace_parser_clear(parser);
359 384
360 memcpy(data->comm, tsk->comm, TASK_COMM_LEN); 385 ret = get_user(ch, ubuf++);
361 data->pid = tsk->pid; 386 if (ret)
362 data->uid = task_uid(tsk); 387 goto out;
363 data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
364 data->policy = tsk->policy;
365 data->rt_priority = tsk->rt_priority;
366 388
367 /* record this tasks comm */ 389 read++;
368 tracing_record_cmdline(tsk); 390 cnt--;
391
392 /*
393 * The parser is not finished with the last write,
394 * continue reading the user input without skipping spaces.
395 */
396 if (!parser->cont) {
397 /* skip white space */
398 while (cnt && isspace(ch)) {
399 ret = get_user(ch, ubuf++);
400 if (ret)
401 goto out;
402 read++;
403 cnt--;
404 }
405
406 /* only spaces were written */
407 if (isspace(ch)) {
408 *ppos += read;
409 ret = read;
410 goto out;
411 }
412
413 parser->idx = 0;
414 }
415
416 /* read the non-space input */
417 while (cnt && !isspace(ch)) {
418 if (parser->idx < parser->size - 1)
419 parser->buffer[parser->idx++] = ch;
420 else {
421 ret = -EINVAL;
422 goto out;
423 }
424 ret = get_user(ch, ubuf++);
425 if (ret)
426 goto out;
427 read++;
428 cnt--;
429 }
430
431 /* We either got finished input or we have to wait for another call. */
432 if (isspace(ch)) {
433 parser->buffer[parser->idx] = 0;
434 parser->cont = false;
435 } else {
436 parser->cont = true;
437 parser->buffer[parser->idx++] = ch;
438 }
439
440 *ppos += read;
441 ret = read;
442
443out:
444 return ret;
369} 445}
370 446
371ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt) 447ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt)
@@ -411,6 +487,56 @@ static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
411 return cnt; 487 return cnt;
412} 488}
413 489
490/*
491 * ftrace_max_lock is used to protect the swapping of buffers
492 * when taking a max snapshot. The buffers themselves are
493 * protected by per_cpu spinlocks. But the action of the swap
494 * needs its own lock.
495 *
496 * This is defined as a raw_spinlock_t in order to help
497 * with performance when lockdep debugging is enabled.
498 *
499 * It is also used in other places outside the update_max_tr
500 * so it needs to be defined outside of the
501 * CONFIG_TRACER_MAX_TRACE.
502 */
503static raw_spinlock_t ftrace_max_lock =
504 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
505
506#ifdef CONFIG_TRACER_MAX_TRACE
507unsigned long __read_mostly tracing_max_latency;
508unsigned long __read_mostly tracing_thresh;
509
510/*
511 * Copy the new maximum trace into the separate maximum-trace
512 * structure. (this way the maximum trace is permanently saved,
513 * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
514 */
515static void
516__update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
517{
518 struct trace_array_cpu *data = tr->data[cpu];
519 struct trace_array_cpu *max_data = tr->data[cpu];
520
521 max_tr.cpu = cpu;
522 max_tr.time_start = data->preempt_timestamp;
523
524 max_data = max_tr.data[cpu];
525 max_data->saved_latency = tracing_max_latency;
526 max_data->critical_start = data->critical_start;
527 max_data->critical_end = data->critical_end;
528
529 memcpy(data->comm, tsk->comm, TASK_COMM_LEN);
530 max_data->pid = tsk->pid;
531 max_data->uid = task_uid(tsk);
532 max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
533 max_data->policy = tsk->policy;
534 max_data->rt_priority = tsk->rt_priority;
535
536 /* record this tasks comm */
537 tracing_record_cmdline(tsk);
538}
539
414/** 540/**
415 * update_max_tr - snapshot all trace buffers from global_trace to max_tr 541 * update_max_tr - snapshot all trace buffers from global_trace to max_tr
416 * @tr: tracer 542 * @tr: tracer
@@ -425,16 +551,15 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
425{ 551{
426 struct ring_buffer *buf = tr->buffer; 552 struct ring_buffer *buf = tr->buffer;
427 553
554 if (trace_stop_count)
555 return;
556
428 WARN_ON_ONCE(!irqs_disabled()); 557 WARN_ON_ONCE(!irqs_disabled());
429 __raw_spin_lock(&ftrace_max_lock); 558 __raw_spin_lock(&ftrace_max_lock);
430 559
431 tr->buffer = max_tr.buffer; 560 tr->buffer = max_tr.buffer;
432 max_tr.buffer = buf; 561 max_tr.buffer = buf;
433 562
434 ftrace_disable_cpu();
435 ring_buffer_reset(tr->buffer);
436 ftrace_enable_cpu();
437
438 __update_max_tr(tr, tsk, cpu); 563 __update_max_tr(tr, tsk, cpu);
439 __raw_spin_unlock(&ftrace_max_lock); 564 __raw_spin_unlock(&ftrace_max_lock);
440} 565}
@@ -452,21 +577,35 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
452{ 577{
453 int ret; 578 int ret;
454 579
580 if (trace_stop_count)
581 return;
582
455 WARN_ON_ONCE(!irqs_disabled()); 583 WARN_ON_ONCE(!irqs_disabled());
456 __raw_spin_lock(&ftrace_max_lock); 584 __raw_spin_lock(&ftrace_max_lock);
457 585
458 ftrace_disable_cpu(); 586 ftrace_disable_cpu();
459 587
460 ring_buffer_reset(max_tr.buffer);
461 ret = ring_buffer_swap_cpu(max_tr.buffer, tr->buffer, cpu); 588 ret = ring_buffer_swap_cpu(max_tr.buffer, tr->buffer, cpu);
462 589
590 if (ret == -EBUSY) {
591 /*
592 * We failed to swap the buffer due to a commit taking
593 * place on this CPU. We fail to record, but we reset
594 * the max trace buffer (no one writes directly to it)
595 * and flag that it failed.
596 */
597 trace_array_printk(&max_tr, _THIS_IP_,
598 "Failed to swap buffers due to commit in progress\n");
599 }
600
463 ftrace_enable_cpu(); 601 ftrace_enable_cpu();
464 602
465 WARN_ON_ONCE(ret && ret != -EAGAIN); 603 WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
466 604
467 __update_max_tr(tr, tsk, cpu); 605 __update_max_tr(tr, tsk, cpu);
468 __raw_spin_unlock(&ftrace_max_lock); 606 __raw_spin_unlock(&ftrace_max_lock);
469} 607}
608#endif /* CONFIG_TRACER_MAX_TRACE */
470 609
471/** 610/**
472 * register_tracer - register a tracer with the ftrace system. 611 * register_tracer - register a tracer with the ftrace system.
@@ -479,7 +618,6 @@ __releases(kernel_lock)
479__acquires(kernel_lock) 618__acquires(kernel_lock)
480{ 619{
481 struct tracer *t; 620 struct tracer *t;
482 int len;
483 int ret = 0; 621 int ret = 0;
484 622
485 if (!type->name) { 623 if (!type->name) {
@@ -487,6 +625,11 @@ __acquires(kernel_lock)
487 return -1; 625 return -1;
488 } 626 }
489 627
628 if (strlen(type->name) > MAX_TRACER_SIZE) {
629 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
630 return -1;
631 }
632
490 /* 633 /*
491 * When this gets called we hold the BKL which means that 634 * When this gets called we hold the BKL which means that
492 * preemption is disabled. Various trace selftests however 635 * preemption is disabled. Various trace selftests however
@@ -501,7 +644,7 @@ __acquires(kernel_lock)
501 for (t = trace_types; t; t = t->next) { 644 for (t = trace_types; t; t = t->next) {
502 if (strcmp(type->name, t->name) == 0) { 645 if (strcmp(type->name, t->name) == 0) {
503 /* already found */ 646 /* already found */
504 pr_info("Trace %s already registered\n", 647 pr_info("Tracer %s already registered\n",
505 type->name); 648 type->name);
506 ret = -1; 649 ret = -1;
507 goto out; 650 goto out;
@@ -523,7 +666,6 @@ __acquires(kernel_lock)
523 if (type->selftest && !tracing_selftest_disabled) { 666 if (type->selftest && !tracing_selftest_disabled) {
524 struct tracer *saved_tracer = current_trace; 667 struct tracer *saved_tracer = current_trace;
525 struct trace_array *tr = &global_trace; 668 struct trace_array *tr = &global_trace;
526 int i;
527 669
528 /* 670 /*
529 * Run a selftest on this tracer. 671 * Run a selftest on this tracer.
@@ -532,8 +674,7 @@ __acquires(kernel_lock)
532 * internal tracing to verify that everything is in order. 674 * internal tracing to verify that everything is in order.
533 * If we fail, we do not register this tracer. 675 * If we fail, we do not register this tracer.
534 */ 676 */
535 for_each_tracing_cpu(i) 677 tracing_reset_online_cpus(tr);
536 tracing_reset(tr, i);
537 678
538 current_trace = type; 679 current_trace = type;
539 /* the test is responsible for initializing and enabling */ 680 /* the test is responsible for initializing and enabling */
@@ -546,8 +687,7 @@ __acquires(kernel_lock)
546 goto out; 687 goto out;
547 } 688 }
548 /* Only reset on passing, to avoid touching corrupted buffers */ 689 /* Only reset on passing, to avoid touching corrupted buffers */
549 for_each_tracing_cpu(i) 690 tracing_reset_online_cpus(tr);
550 tracing_reset(tr, i);
551 691
552 printk(KERN_CONT "PASSED\n"); 692 printk(KERN_CONT "PASSED\n");
553 } 693 }
@@ -555,9 +695,6 @@ __acquires(kernel_lock)
555 695
556 type->next = trace_types; 696 type->next = trace_types;
557 trace_types = type; 697 trace_types = type;
558 len = strlen(type->name);
559 if (len > max_tracer_type_len)
560 max_tracer_type_len = len;
561 698
562 out: 699 out:
563 tracing_selftest_running = false; 700 tracing_selftest_running = false;
@@ -566,7 +703,7 @@ __acquires(kernel_lock)
566 if (ret || !default_bootup_tracer) 703 if (ret || !default_bootup_tracer)
567 goto out_unlock; 704 goto out_unlock;
568 705
569 if (strncmp(default_bootup_tracer, type->name, BOOTUP_TRACER_SIZE)) 706 if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
570 goto out_unlock; 707 goto out_unlock;
571 708
572 printk(KERN_INFO "Starting tracer '%s'\n", type->name); 709 printk(KERN_INFO "Starting tracer '%s'\n", type->name);
@@ -588,14 +725,13 @@ __acquires(kernel_lock)
588void unregister_tracer(struct tracer *type) 725void unregister_tracer(struct tracer *type)
589{ 726{
590 struct tracer **t; 727 struct tracer **t;
591 int len;
592 728
593 mutex_lock(&trace_types_lock); 729 mutex_lock(&trace_types_lock);
594 for (t = &trace_types; *t; t = &(*t)->next) { 730 for (t = &trace_types; *t; t = &(*t)->next) {
595 if (*t == type) 731 if (*t == type)
596 goto found; 732 goto found;
597 } 733 }
598 pr_info("Trace %s not registered\n", type->name); 734 pr_info("Tracer %s not registered\n", type->name);
599 goto out; 735 goto out;
600 736
601 found: 737 found:
@@ -608,35 +744,46 @@ void unregister_tracer(struct tracer *type)
608 current_trace->stop(&global_trace); 744 current_trace->stop(&global_trace);
609 current_trace = &nop_trace; 745 current_trace = &nop_trace;
610 } 746 }
611 747out:
612 if (strlen(type->name) != max_tracer_type_len)
613 goto out;
614
615 max_tracer_type_len = 0;
616 for (t = &trace_types; *t; t = &(*t)->next) {
617 len = strlen((*t)->name);
618 if (len > max_tracer_type_len)
619 max_tracer_type_len = len;
620 }
621 out:
622 mutex_unlock(&trace_types_lock); 748 mutex_unlock(&trace_types_lock);
623} 749}
624 750
625void tracing_reset(struct trace_array *tr, int cpu) 751static void __tracing_reset(struct trace_array *tr, int cpu)
626{ 752{
627 ftrace_disable_cpu(); 753 ftrace_disable_cpu();
628 ring_buffer_reset_cpu(tr->buffer, cpu); 754 ring_buffer_reset_cpu(tr->buffer, cpu);
629 ftrace_enable_cpu(); 755 ftrace_enable_cpu();
630} 756}
631 757
758void tracing_reset(struct trace_array *tr, int cpu)
759{
760 struct ring_buffer *buffer = tr->buffer;
761
762 ring_buffer_record_disable(buffer);
763
764 /* Make sure all commits have finished */
765 synchronize_sched();
766 __tracing_reset(tr, cpu);
767
768 ring_buffer_record_enable(buffer);
769}
770
632void tracing_reset_online_cpus(struct trace_array *tr) 771void tracing_reset_online_cpus(struct trace_array *tr)
633{ 772{
773 struct ring_buffer *buffer = tr->buffer;
634 int cpu; 774 int cpu;
635 775
776 ring_buffer_record_disable(buffer);
777
778 /* Make sure all commits have finished */
779 synchronize_sched();
780
636 tr->time_start = ftrace_now(tr->cpu); 781 tr->time_start = ftrace_now(tr->cpu);
637 782
638 for_each_online_cpu(cpu) 783 for_each_online_cpu(cpu)
639 tracing_reset(tr, cpu); 784 __tracing_reset(tr, cpu);
785
786 ring_buffer_record_enable(buffer);
640} 787}
641 788
642void tracing_reset_current(int cpu) 789void tracing_reset_current(int cpu)
@@ -667,8 +814,10 @@ static void trace_init_cmdlines(void)
667 cmdline_idx = 0; 814 cmdline_idx = 0;
668} 815}
669 816
670static int trace_stop_count; 817int is_tracing_stopped(void)
671static DEFINE_SPINLOCK(tracing_start_lock); 818{
819 return trace_stop_count;
820}
672 821
673/** 822/**
674 * ftrace_off_permanent - disable all ftrace code permanently 823 * ftrace_off_permanent - disable all ftrace code permanently
@@ -837,7 +986,7 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
837 986
838 entry->preempt_count = pc & 0xff; 987 entry->preempt_count = pc & 0xff;
839 entry->pid = (tsk) ? tsk->pid : 0; 988 entry->pid = (tsk) ? tsk->pid : 0;
840 entry->tgid = (tsk) ? tsk->tgid : 0; 989 entry->lock_depth = (tsk) ? tsk->lock_depth : 0;
841 entry->flags = 990 entry->flags =
842#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT 991#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
843 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) | 992 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
@@ -848,15 +997,17 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
848 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) | 997 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
849 (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0); 998 (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0);
850} 999}
1000EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
851 1001
852struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr, 1002struct ring_buffer_event *
853 int type, 1003trace_buffer_lock_reserve(struct ring_buffer *buffer,
854 unsigned long len, 1004 int type,
855 unsigned long flags, int pc) 1005 unsigned long len,
1006 unsigned long flags, int pc)
856{ 1007{
857 struct ring_buffer_event *event; 1008 struct ring_buffer_event *event;
858 1009
859 event = ring_buffer_lock_reserve(tr->buffer, len); 1010 event = ring_buffer_lock_reserve(buffer, len);
860 if (event != NULL) { 1011 if (event != NULL) {
861 struct trace_entry *ent = ring_buffer_event_data(event); 1012 struct trace_entry *ent = ring_buffer_event_data(event);
862 1013
@@ -866,58 +1017,60 @@ struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr,
866 1017
867 return event; 1018 return event;
868} 1019}
869static void ftrace_trace_stack(struct trace_array *tr,
870 unsigned long flags, int skip, int pc);
871static void ftrace_trace_userstack(struct trace_array *tr,
872 unsigned long flags, int pc);
873 1020
874static inline void __trace_buffer_unlock_commit(struct trace_array *tr, 1021static inline void
875 struct ring_buffer_event *event, 1022__trace_buffer_unlock_commit(struct ring_buffer *buffer,
876 unsigned long flags, int pc, 1023 struct ring_buffer_event *event,
877 int wake) 1024 unsigned long flags, int pc,
1025 int wake)
878{ 1026{
879 ring_buffer_unlock_commit(tr->buffer, event); 1027 ring_buffer_unlock_commit(buffer, event);
880 1028
881 ftrace_trace_stack(tr, flags, 6, pc); 1029 ftrace_trace_stack(buffer, flags, 6, pc);
882 ftrace_trace_userstack(tr, flags, pc); 1030 ftrace_trace_userstack(buffer, flags, pc);
883 1031
884 if (wake) 1032 if (wake)
885 trace_wake_up(); 1033 trace_wake_up();
886} 1034}
887 1035
888void trace_buffer_unlock_commit(struct trace_array *tr, 1036void trace_buffer_unlock_commit(struct ring_buffer *buffer,
889 struct ring_buffer_event *event, 1037 struct ring_buffer_event *event,
890 unsigned long flags, int pc) 1038 unsigned long flags, int pc)
891{ 1039{
892 __trace_buffer_unlock_commit(tr, event, flags, pc, 1); 1040 __trace_buffer_unlock_commit(buffer, event, flags, pc, 1);
893} 1041}
894 1042
895struct ring_buffer_event * 1043struct ring_buffer_event *
896trace_current_buffer_lock_reserve(int type, unsigned long len, 1044trace_current_buffer_lock_reserve(struct ring_buffer **current_rb,
1045 int type, unsigned long len,
897 unsigned long flags, int pc) 1046 unsigned long flags, int pc)
898{ 1047{
899 return trace_buffer_lock_reserve(&global_trace, 1048 *current_rb = global_trace.buffer;
1049 return trace_buffer_lock_reserve(*current_rb,
900 type, len, flags, pc); 1050 type, len, flags, pc);
901} 1051}
902EXPORT_SYMBOL_GPL(trace_current_buffer_lock_reserve); 1052EXPORT_SYMBOL_GPL(trace_current_buffer_lock_reserve);
903 1053
904void trace_current_buffer_unlock_commit(struct ring_buffer_event *event, 1054void trace_current_buffer_unlock_commit(struct ring_buffer *buffer,
1055 struct ring_buffer_event *event,
905 unsigned long flags, int pc) 1056 unsigned long flags, int pc)
906{ 1057{
907 __trace_buffer_unlock_commit(&global_trace, event, flags, pc, 1); 1058 __trace_buffer_unlock_commit(buffer, event, flags, pc, 1);
908} 1059}
909EXPORT_SYMBOL_GPL(trace_current_buffer_unlock_commit); 1060EXPORT_SYMBOL_GPL(trace_current_buffer_unlock_commit);
910 1061
911void trace_nowake_buffer_unlock_commit(struct ring_buffer_event *event, 1062void trace_nowake_buffer_unlock_commit(struct ring_buffer *buffer,
912 unsigned long flags, int pc) 1063 struct ring_buffer_event *event,
1064 unsigned long flags, int pc)
913{ 1065{
914 __trace_buffer_unlock_commit(&global_trace, event, flags, pc, 0); 1066 __trace_buffer_unlock_commit(buffer, event, flags, pc, 0);
915} 1067}
916EXPORT_SYMBOL_GPL(trace_nowake_buffer_unlock_commit); 1068EXPORT_SYMBOL_GPL(trace_nowake_buffer_unlock_commit);
917 1069
918void trace_current_buffer_discard_commit(struct ring_buffer_event *event) 1070void trace_current_buffer_discard_commit(struct ring_buffer *buffer,
1071 struct ring_buffer_event *event)
919{ 1072{
920 ring_buffer_discard_commit(global_trace.buffer, event); 1073 ring_buffer_discard_commit(buffer, event);
921} 1074}
922EXPORT_SYMBOL_GPL(trace_current_buffer_discard_commit); 1075EXPORT_SYMBOL_GPL(trace_current_buffer_discard_commit);
923 1076
@@ -927,6 +1080,7 @@ trace_function(struct trace_array *tr,
927 int pc) 1080 int pc)
928{ 1081{
929 struct ftrace_event_call *call = &event_function; 1082 struct ftrace_event_call *call = &event_function;
1083 struct ring_buffer *buffer = tr->buffer;
930 struct ring_buffer_event *event; 1084 struct ring_buffer_event *event;
931 struct ftrace_entry *entry; 1085 struct ftrace_entry *entry;
932 1086
@@ -934,7 +1088,7 @@ trace_function(struct trace_array *tr,
934 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) 1088 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
935 return; 1089 return;
936 1090
937 event = trace_buffer_lock_reserve(tr, TRACE_FN, sizeof(*entry), 1091 event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
938 flags, pc); 1092 flags, pc);
939 if (!event) 1093 if (!event)
940 return; 1094 return;
@@ -942,58 +1096,10 @@ trace_function(struct trace_array *tr,
942 entry->ip = ip; 1096 entry->ip = ip;
943 entry->parent_ip = parent_ip; 1097 entry->parent_ip = parent_ip;
944 1098
945 if (!filter_check_discard(call, entry, tr->buffer, event)) 1099 if (!filter_check_discard(call, entry, buffer, event))
946 ring_buffer_unlock_commit(tr->buffer, event); 1100 ring_buffer_unlock_commit(buffer, event);
947} 1101}
948 1102
949#ifdef CONFIG_FUNCTION_GRAPH_TRACER
950static int __trace_graph_entry(struct trace_array *tr,
951 struct ftrace_graph_ent *trace,
952 unsigned long flags,
953 int pc)
954{
955 struct ftrace_event_call *call = &event_funcgraph_entry;
956 struct ring_buffer_event *event;
957 struct ftrace_graph_ent_entry *entry;
958
959 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
960 return 0;
961
962 event = trace_buffer_lock_reserve(&global_trace, TRACE_GRAPH_ENT,
963 sizeof(*entry), flags, pc);
964 if (!event)
965 return 0;
966 entry = ring_buffer_event_data(event);
967 entry->graph_ent = *trace;
968 if (!filter_current_check_discard(call, entry, event))
969 ring_buffer_unlock_commit(global_trace.buffer, event);
970
971 return 1;
972}
973
974static void __trace_graph_return(struct trace_array *tr,
975 struct ftrace_graph_ret *trace,
976 unsigned long flags,
977 int pc)
978{
979 struct ftrace_event_call *call = &event_funcgraph_exit;
980 struct ring_buffer_event *event;
981 struct ftrace_graph_ret_entry *entry;
982
983 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
984 return;
985
986 event = trace_buffer_lock_reserve(&global_trace, TRACE_GRAPH_RET,
987 sizeof(*entry), flags, pc);
988 if (!event)
989 return;
990 entry = ring_buffer_event_data(event);
991 entry->ret = *trace;
992 if (!filter_current_check_discard(call, entry, event))
993 ring_buffer_unlock_commit(global_trace.buffer, event);
994}
995#endif
996
997void 1103void
998ftrace(struct trace_array *tr, struct trace_array_cpu *data, 1104ftrace(struct trace_array *tr, struct trace_array_cpu *data,
999 unsigned long ip, unsigned long parent_ip, unsigned long flags, 1105 unsigned long ip, unsigned long parent_ip, unsigned long flags,
@@ -1003,17 +1109,17 @@ ftrace(struct trace_array *tr, struct trace_array_cpu *data,
1003 trace_function(tr, ip, parent_ip, flags, pc); 1109 trace_function(tr, ip, parent_ip, flags, pc);
1004} 1110}
1005 1111
1006static void __ftrace_trace_stack(struct trace_array *tr, 1112#ifdef CONFIG_STACKTRACE
1113static void __ftrace_trace_stack(struct ring_buffer *buffer,
1007 unsigned long flags, 1114 unsigned long flags,
1008 int skip, int pc) 1115 int skip, int pc)
1009{ 1116{
1010#ifdef CONFIG_STACKTRACE
1011 struct ftrace_event_call *call = &event_kernel_stack; 1117 struct ftrace_event_call *call = &event_kernel_stack;
1012 struct ring_buffer_event *event; 1118 struct ring_buffer_event *event;
1013 struct stack_entry *entry; 1119 struct stack_entry *entry;
1014 struct stack_trace trace; 1120 struct stack_trace trace;
1015 1121
1016 event = trace_buffer_lock_reserve(tr, TRACE_STACK, 1122 event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
1017 sizeof(*entry), flags, pc); 1123 sizeof(*entry), flags, pc);
1018 if (!event) 1124 if (!event)
1019 return; 1125 return;
@@ -1026,32 +1132,28 @@ static void __ftrace_trace_stack(struct trace_array *tr,
1026 trace.entries = entry->caller; 1132 trace.entries = entry->caller;
1027 1133
1028 save_stack_trace(&trace); 1134 save_stack_trace(&trace);
1029 if (!filter_check_discard(call, entry, tr->buffer, event)) 1135 if (!filter_check_discard(call, entry, buffer, event))
1030 ring_buffer_unlock_commit(tr->buffer, event); 1136 ring_buffer_unlock_commit(buffer, event);
1031#endif
1032} 1137}
1033 1138
1034static void ftrace_trace_stack(struct trace_array *tr, 1139void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags,
1035 unsigned long flags, 1140 int skip, int pc)
1036 int skip, int pc)
1037{ 1141{
1038 if (!(trace_flags & TRACE_ITER_STACKTRACE)) 1142 if (!(trace_flags & TRACE_ITER_STACKTRACE))
1039 return; 1143 return;
1040 1144
1041 __ftrace_trace_stack(tr, flags, skip, pc); 1145 __ftrace_trace_stack(buffer, flags, skip, pc);
1042} 1146}
1043 1147
1044void __trace_stack(struct trace_array *tr, 1148void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
1045 unsigned long flags, 1149 int pc)
1046 int skip, int pc)
1047{ 1150{
1048 __ftrace_trace_stack(tr, flags, skip, pc); 1151 __ftrace_trace_stack(tr->buffer, flags, skip, pc);
1049} 1152}
1050 1153
1051static void ftrace_trace_userstack(struct trace_array *tr, 1154void
1052 unsigned long flags, int pc) 1155ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1053{ 1156{
1054#ifdef CONFIG_STACKTRACE
1055 struct ftrace_event_call *call = &event_user_stack; 1157 struct ftrace_event_call *call = &event_user_stack;
1056 struct ring_buffer_event *event; 1158 struct ring_buffer_event *event;
1057 struct userstack_entry *entry; 1159 struct userstack_entry *entry;
@@ -1060,12 +1162,13 @@ static void ftrace_trace_userstack(struct trace_array *tr,
1060 if (!(trace_flags & TRACE_ITER_USERSTACKTRACE)) 1162 if (!(trace_flags & TRACE_ITER_USERSTACKTRACE))
1061 return; 1163 return;
1062 1164
1063 event = trace_buffer_lock_reserve(tr, TRACE_USER_STACK, 1165 event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
1064 sizeof(*entry), flags, pc); 1166 sizeof(*entry), flags, pc);
1065 if (!event) 1167 if (!event)
1066 return; 1168 return;
1067 entry = ring_buffer_event_data(event); 1169 entry = ring_buffer_event_data(event);
1068 1170
1171 entry->tgid = current->tgid;
1069 memset(&entry->caller, 0, sizeof(entry->caller)); 1172 memset(&entry->caller, 0, sizeof(entry->caller));
1070 1173
1071 trace.nr_entries = 0; 1174 trace.nr_entries = 0;
@@ -1074,9 +1177,8 @@ static void ftrace_trace_userstack(struct trace_array *tr,
1074 trace.entries = entry->caller; 1177 trace.entries = entry->caller;
1075 1178
1076 save_stack_trace_user(&trace); 1179 save_stack_trace_user(&trace);
1077 if (!filter_check_discard(call, entry, tr->buffer, event)) 1180 if (!filter_check_discard(call, entry, buffer, event))
1078 ring_buffer_unlock_commit(tr->buffer, event); 1181 ring_buffer_unlock_commit(buffer, event);
1079#endif
1080} 1182}
1081 1183
1082#ifdef UNUSED 1184#ifdef UNUSED
@@ -1086,16 +1188,20 @@ static void __trace_userstack(struct trace_array *tr, unsigned long flags)
1086} 1188}
1087#endif /* UNUSED */ 1189#endif /* UNUSED */
1088 1190
1191#endif /* CONFIG_STACKTRACE */
1192
1089static void 1193static void
1090ftrace_trace_special(void *__tr, 1194ftrace_trace_special(void *__tr,
1091 unsigned long arg1, unsigned long arg2, unsigned long arg3, 1195 unsigned long arg1, unsigned long arg2, unsigned long arg3,
1092 int pc) 1196 int pc)
1093{ 1197{
1198 struct ftrace_event_call *call = &event_special;
1094 struct ring_buffer_event *event; 1199 struct ring_buffer_event *event;
1095 struct trace_array *tr = __tr; 1200 struct trace_array *tr = __tr;
1201 struct ring_buffer *buffer = tr->buffer;
1096 struct special_entry *entry; 1202 struct special_entry *entry;
1097 1203
1098 event = trace_buffer_lock_reserve(tr, TRACE_SPECIAL, 1204 event = trace_buffer_lock_reserve(buffer, TRACE_SPECIAL,
1099 sizeof(*entry), 0, pc); 1205 sizeof(*entry), 0, pc);
1100 if (!event) 1206 if (!event)
1101 return; 1207 return;
@@ -1103,7 +1209,9 @@ ftrace_trace_special(void *__tr,
1103 entry->arg1 = arg1; 1209 entry->arg1 = arg1;
1104 entry->arg2 = arg2; 1210 entry->arg2 = arg2;
1105 entry->arg3 = arg3; 1211 entry->arg3 = arg3;
1106 trace_buffer_unlock_commit(tr, event, 0, pc); 1212
1213 if (!filter_check_discard(call, entry, buffer, event))
1214 trace_buffer_unlock_commit(buffer, event, 0, pc);
1107} 1215}
1108 1216
1109void 1217void
@@ -1114,62 +1222,6 @@ __trace_special(void *__tr, void *__data,
1114} 1222}
1115 1223
1116void 1224void
1117tracing_sched_switch_trace(struct trace_array *tr,
1118 struct task_struct *prev,
1119 struct task_struct *next,
1120 unsigned long flags, int pc)
1121{
1122 struct ftrace_event_call *call = &event_context_switch;
1123 struct ring_buffer_event *event;
1124 struct ctx_switch_entry *entry;
1125
1126 event = trace_buffer_lock_reserve(tr, TRACE_CTX,
1127 sizeof(*entry), flags, pc);
1128 if (!event)
1129 return;
1130 entry = ring_buffer_event_data(event);
1131 entry->prev_pid = prev->pid;
1132 entry->prev_prio = prev->prio;
1133 entry->prev_state = prev->state;
1134 entry->next_pid = next->pid;
1135 entry->next_prio = next->prio;
1136 entry->next_state = next->state;
1137 entry->next_cpu = task_cpu(next);
1138
1139 if (!filter_check_discard(call, entry, tr->buffer, event))
1140 trace_buffer_unlock_commit(tr, event, flags, pc);
1141}
1142
1143void
1144tracing_sched_wakeup_trace(struct trace_array *tr,
1145 struct task_struct *wakee,
1146 struct task_struct *curr,
1147 unsigned long flags, int pc)
1148{
1149 struct ftrace_event_call *call = &event_wakeup;
1150 struct ring_buffer_event *event;
1151 struct ctx_switch_entry *entry;
1152
1153 event = trace_buffer_lock_reserve(tr, TRACE_WAKE,
1154 sizeof(*entry), flags, pc);
1155 if (!event)
1156 return;
1157 entry = ring_buffer_event_data(event);
1158 entry->prev_pid = curr->pid;
1159 entry->prev_prio = curr->prio;
1160 entry->prev_state = curr->state;
1161 entry->next_pid = wakee->pid;
1162 entry->next_prio = wakee->prio;
1163 entry->next_state = wakee->state;
1164 entry->next_cpu = task_cpu(wakee);
1165
1166 if (!filter_check_discard(call, entry, tr->buffer, event))
1167 ring_buffer_unlock_commit(tr->buffer, event);
1168 ftrace_trace_stack(tr, flags, 6, pc);
1169 ftrace_trace_userstack(tr, flags, pc);
1170}
1171
1172void
1173ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) 1225ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
1174{ 1226{
1175 struct trace_array *tr = &global_trace; 1227 struct trace_array *tr = &global_trace;
@@ -1193,68 +1245,6 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
1193 local_irq_restore(flags); 1245 local_irq_restore(flags);
1194} 1246}
1195 1247
1196#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1197int trace_graph_entry(struct ftrace_graph_ent *trace)
1198{
1199 struct trace_array *tr = &global_trace;
1200 struct trace_array_cpu *data;
1201 unsigned long flags;
1202 long disabled;
1203 int ret;
1204 int cpu;
1205 int pc;
1206
1207 if (!ftrace_trace_task(current))
1208 return 0;
1209
1210 if (!ftrace_graph_addr(trace->func))
1211 return 0;
1212
1213 local_irq_save(flags);
1214 cpu = raw_smp_processor_id();
1215 data = tr->data[cpu];
1216 disabled = atomic_inc_return(&data->disabled);
1217 if (likely(disabled == 1)) {
1218 pc = preempt_count();
1219 ret = __trace_graph_entry(tr, trace, flags, pc);
1220 } else {
1221 ret = 0;
1222 }
1223 /* Only do the atomic if it is not already set */
1224 if (!test_tsk_trace_graph(current))
1225 set_tsk_trace_graph(current);
1226
1227 atomic_dec(&data->disabled);
1228 local_irq_restore(flags);
1229
1230 return ret;
1231}
1232
1233void trace_graph_return(struct ftrace_graph_ret *trace)
1234{
1235 struct trace_array *tr = &global_trace;
1236 struct trace_array_cpu *data;
1237 unsigned long flags;
1238 long disabled;
1239 int cpu;
1240 int pc;
1241
1242 local_irq_save(flags);
1243 cpu = raw_smp_processor_id();
1244 data = tr->data[cpu];
1245 disabled = atomic_inc_return(&data->disabled);
1246 if (likely(disabled == 1)) {
1247 pc = preempt_count();
1248 __trace_graph_return(tr, trace, flags, pc);
1249 }
1250 if (!trace->depth)
1251 clear_tsk_trace_graph(current);
1252 atomic_dec(&data->disabled);
1253 local_irq_restore(flags);
1254}
1255#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
1256
1257
1258/** 1248/**
1259 * trace_vbprintk - write binary msg to tracing buffer 1249 * trace_vbprintk - write binary msg to tracing buffer
1260 * 1250 *
@@ -1267,6 +1257,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
1267 1257
1268 struct ftrace_event_call *call = &event_bprint; 1258 struct ftrace_event_call *call = &event_bprint;
1269 struct ring_buffer_event *event; 1259 struct ring_buffer_event *event;
1260 struct ring_buffer *buffer;
1270 struct trace_array *tr = &global_trace; 1261 struct trace_array *tr = &global_trace;
1271 struct trace_array_cpu *data; 1262 struct trace_array_cpu *data;
1272 struct bprint_entry *entry; 1263 struct bprint_entry *entry;
@@ -1299,7 +1290,9 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
1299 goto out_unlock; 1290 goto out_unlock;
1300 1291
1301 size = sizeof(*entry) + sizeof(u32) * len; 1292 size = sizeof(*entry) + sizeof(u32) * len;
1302 event = trace_buffer_lock_reserve(tr, TRACE_BPRINT, size, flags, pc); 1293 buffer = tr->buffer;
1294 event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
1295 flags, pc);
1303 if (!event) 1296 if (!event)
1304 goto out_unlock; 1297 goto out_unlock;
1305 entry = ring_buffer_event_data(event); 1298 entry = ring_buffer_event_data(event);
@@ -1307,8 +1300,8 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
1307 entry->fmt = fmt; 1300 entry->fmt = fmt;
1308 1301
1309 memcpy(entry->buf, trace_buf, sizeof(u32) * len); 1302 memcpy(entry->buf, trace_buf, sizeof(u32) * len);
1310 if (!filter_check_discard(call, entry, tr->buffer, event)) 1303 if (!filter_check_discard(call, entry, buffer, event))
1311 ring_buffer_unlock_commit(tr->buffer, event); 1304 ring_buffer_unlock_commit(buffer, event);
1312 1305
1313out_unlock: 1306out_unlock:
1314 __raw_spin_unlock(&trace_buf_lock); 1307 __raw_spin_unlock(&trace_buf_lock);
@@ -1323,14 +1316,30 @@ out:
1323} 1316}
1324EXPORT_SYMBOL_GPL(trace_vbprintk); 1317EXPORT_SYMBOL_GPL(trace_vbprintk);
1325 1318
1326int trace_vprintk(unsigned long ip, const char *fmt, va_list args) 1319int trace_array_printk(struct trace_array *tr,
1320 unsigned long ip, const char *fmt, ...)
1321{
1322 int ret;
1323 va_list ap;
1324
1325 if (!(trace_flags & TRACE_ITER_PRINTK))
1326 return 0;
1327
1328 va_start(ap, fmt);
1329 ret = trace_array_vprintk(tr, ip, fmt, ap);
1330 va_end(ap);
1331 return ret;
1332}
1333
1334int trace_array_vprintk(struct trace_array *tr,
1335 unsigned long ip, const char *fmt, va_list args)
1327{ 1336{
1328 static raw_spinlock_t trace_buf_lock = __RAW_SPIN_LOCK_UNLOCKED; 1337 static raw_spinlock_t trace_buf_lock = __RAW_SPIN_LOCK_UNLOCKED;
1329 static char trace_buf[TRACE_BUF_SIZE]; 1338 static char trace_buf[TRACE_BUF_SIZE];
1330 1339
1331 struct ftrace_event_call *call = &event_print; 1340 struct ftrace_event_call *call = &event_print;
1332 struct ring_buffer_event *event; 1341 struct ring_buffer_event *event;
1333 struct trace_array *tr = &global_trace; 1342 struct ring_buffer *buffer;
1334 struct trace_array_cpu *data; 1343 struct trace_array_cpu *data;
1335 int cpu, len = 0, size, pc; 1344 int cpu, len = 0, size, pc;
1336 struct print_entry *entry; 1345 struct print_entry *entry;
@@ -1358,7 +1367,9 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
1358 trace_buf[len] = 0; 1367 trace_buf[len] = 0;
1359 1368
1360 size = sizeof(*entry) + len + 1; 1369 size = sizeof(*entry) + len + 1;
1361 event = trace_buffer_lock_reserve(tr, TRACE_PRINT, size, irq_flags, pc); 1370 buffer = tr->buffer;
1371 event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
1372 irq_flags, pc);
1362 if (!event) 1373 if (!event)
1363 goto out_unlock; 1374 goto out_unlock;
1364 entry = ring_buffer_event_data(event); 1375 entry = ring_buffer_event_data(event);
@@ -1366,8 +1377,8 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
1366 1377
1367 memcpy(&entry->buf, trace_buf, len); 1378 memcpy(&entry->buf, trace_buf, len);
1368 entry->buf[len] = 0; 1379 entry->buf[len] = 0;
1369 if (!filter_check_discard(call, entry, tr->buffer, event)) 1380 if (!filter_check_discard(call, entry, buffer, event))
1370 ring_buffer_unlock_commit(tr->buffer, event); 1381 ring_buffer_unlock_commit(buffer, event);
1371 1382
1372 out_unlock: 1383 out_unlock:
1373 __raw_spin_unlock(&trace_buf_lock); 1384 __raw_spin_unlock(&trace_buf_lock);
@@ -1379,6 +1390,11 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
1379 1390
1380 return len; 1391 return len;
1381} 1392}
1393
1394int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
1395{
1396 return trace_array_printk(&global_trace, ip, fmt, args);
1397}
1382EXPORT_SYMBOL_GPL(trace_vprintk); 1398EXPORT_SYMBOL_GPL(trace_vprintk);
1383 1399
1384enum trace_file_type { 1400enum trace_file_type {
@@ -1518,6 +1534,37 @@ static void *s_next(struct seq_file *m, void *v, loff_t *pos)
1518 return ent; 1534 return ent;
1519} 1535}
1520 1536
1537static void tracing_iter_reset(struct trace_iterator *iter, int cpu)
1538{
1539 struct trace_array *tr = iter->tr;
1540 struct ring_buffer_event *event;
1541 struct ring_buffer_iter *buf_iter;
1542 unsigned long entries = 0;
1543 u64 ts;
1544
1545 tr->data[cpu]->skipped_entries = 0;
1546
1547 if (!iter->buffer_iter[cpu])
1548 return;
1549
1550 buf_iter = iter->buffer_iter[cpu];
1551 ring_buffer_iter_reset(buf_iter);
1552
1553 /*
1554 * We could have the case with the max latency tracers
1555 * that a reset never took place on a cpu. This is evident
1556 * by the timestamp being before the start of the buffer.
1557 */
1558 while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
1559 if (ts >= iter->tr->time_start)
1560 break;
1561 entries++;
1562 ring_buffer_read(buf_iter, NULL);
1563 }
1564
1565 tr->data[cpu]->skipped_entries = entries;
1566}
1567
1521/* 1568/*
1522 * No necessary locking here. The worst thing which can 1569 * No necessary locking here. The worst thing which can
1523 * happen is loosing events consumed at the same time 1570 * happen is loosing events consumed at the same time
@@ -1556,10 +1603,9 @@ static void *s_start(struct seq_file *m, loff_t *pos)
1556 1603
1557 if (cpu_file == TRACE_PIPE_ALL_CPU) { 1604 if (cpu_file == TRACE_PIPE_ALL_CPU) {
1558 for_each_tracing_cpu(cpu) 1605 for_each_tracing_cpu(cpu)
1559 ring_buffer_iter_reset(iter->buffer_iter[cpu]); 1606 tracing_iter_reset(iter, cpu);
1560 } else 1607 } else
1561 ring_buffer_iter_reset(iter->buffer_iter[cpu_file]); 1608 tracing_iter_reset(iter, cpu_file);
1562
1563 1609
1564 ftrace_enable_cpu(); 1610 ftrace_enable_cpu();
1565 1611
@@ -1588,10 +1634,10 @@ static void print_lat_help_header(struct seq_file *m)
1588 seq_puts(m, "# | / _----=> need-resched \n"); 1634 seq_puts(m, "# | / _----=> need-resched \n");
1589 seq_puts(m, "# || / _---=> hardirq/softirq \n"); 1635 seq_puts(m, "# || / _---=> hardirq/softirq \n");
1590 seq_puts(m, "# ||| / _--=> preempt-depth \n"); 1636 seq_puts(m, "# ||| / _--=> preempt-depth \n");
1591 seq_puts(m, "# |||| / \n"); 1637 seq_puts(m, "# |||| /_--=> lock-depth \n");
1592 seq_puts(m, "# ||||| delay \n"); 1638 seq_puts(m, "# |||||/ delay \n");
1593 seq_puts(m, "# cmd pid ||||| time | caller \n"); 1639 seq_puts(m, "# cmd pid |||||| time | caller \n");
1594 seq_puts(m, "# \\ / ||||| \\ | / \n"); 1640 seq_puts(m, "# \\ / |||||| \\ | / \n");
1595} 1641}
1596 1642
1597static void print_func_help_header(struct seq_file *m) 1643static void print_func_help_header(struct seq_file *m)
@@ -1608,16 +1654,32 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter)
1608 struct trace_array *tr = iter->tr; 1654 struct trace_array *tr = iter->tr;
1609 struct trace_array_cpu *data = tr->data[tr->cpu]; 1655 struct trace_array_cpu *data = tr->data[tr->cpu];
1610 struct tracer *type = current_trace; 1656 struct tracer *type = current_trace;
1611 unsigned long total; 1657 unsigned long entries = 0;
1612 unsigned long entries; 1658 unsigned long total = 0;
1659 unsigned long count;
1613 const char *name = "preemption"; 1660 const char *name = "preemption";
1661 int cpu;
1614 1662
1615 if (type) 1663 if (type)
1616 name = type->name; 1664 name = type->name;
1617 1665
1618 entries = ring_buffer_entries(iter->tr->buffer); 1666
1619 total = entries + 1667 for_each_tracing_cpu(cpu) {
1620 ring_buffer_overruns(iter->tr->buffer); 1668 count = ring_buffer_entries_cpu(tr->buffer, cpu);
1669 /*
1670 * If this buffer has skipped entries, then we hold all
1671 * entries for the trace and we need to ignore the
1672 * ones before the time stamp.
1673 */
1674 if (tr->data[cpu]->skipped_entries) {
1675 count -= tr->data[cpu]->skipped_entries;
1676 /* total is the same as the entries */
1677 total += count;
1678 } else
1679 total += count +
1680 ring_buffer_overrun_cpu(tr->buffer, cpu);
1681 entries += count;
1682 }
1621 1683
1622 seq_printf(m, "# %s latency trace v1.1.5 on %s\n", 1684 seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
1623 name, UTS_RELEASE); 1685 name, UTS_RELEASE);
@@ -1659,7 +1721,7 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter)
1659 seq_puts(m, "\n# => ended at: "); 1721 seq_puts(m, "\n# => ended at: ");
1660 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags); 1722 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
1661 trace_print_seq(m, &iter->seq); 1723 trace_print_seq(m, &iter->seq);
1662 seq_puts(m, "#\n"); 1724 seq_puts(m, "\n#\n");
1663 } 1725 }
1664 1726
1665 seq_puts(m, "#\n"); 1727 seq_puts(m, "#\n");
@@ -1678,6 +1740,9 @@ static void test_cpu_buff_start(struct trace_iterator *iter)
1678 if (cpumask_test_cpu(iter->cpu, iter->started)) 1740 if (cpumask_test_cpu(iter->cpu, iter->started))
1679 return; 1741 return;
1680 1742
1743 if (iter->tr->data[iter->cpu]->skipped_entries)
1744 return;
1745
1681 cpumask_set_cpu(iter->cpu, iter->started); 1746 cpumask_set_cpu(iter->cpu, iter->started);
1682 1747
1683 /* Don't print started cpu buffer for the first entry of the trace */ 1748 /* Don't print started cpu buffer for the first entry of the trace */
@@ -1884,7 +1949,7 @@ static int s_show(struct seq_file *m, void *v)
1884 return 0; 1949 return 0;
1885} 1950}
1886 1951
1887static struct seq_operations tracer_seq_ops = { 1952static const struct seq_operations tracer_seq_ops = {
1888 .start = s_start, 1953 .start = s_start,
1889 .next = s_next, 1954 .next = s_next,
1890 .stop = s_stop, 1955 .stop = s_stop,
@@ -1919,11 +1984,9 @@ __tracing_open(struct inode *inode, struct file *file)
1919 if (current_trace) 1984 if (current_trace)
1920 *iter->trace = *current_trace; 1985 *iter->trace = *current_trace;
1921 1986
1922 if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) 1987 if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
1923 goto fail; 1988 goto fail;
1924 1989
1925 cpumask_clear(iter->started);
1926
1927 if (current_trace && current_trace->print_max) 1990 if (current_trace && current_trace->print_max)
1928 iter->tr = &max_tr; 1991 iter->tr = &max_tr;
1929 else 1992 else
@@ -1940,19 +2003,23 @@ __tracing_open(struct inode *inode, struct file *file)
1940 if (ring_buffer_overruns(iter->tr->buffer)) 2003 if (ring_buffer_overruns(iter->tr->buffer))
1941 iter->iter_flags |= TRACE_FILE_ANNOTATE; 2004 iter->iter_flags |= TRACE_FILE_ANNOTATE;
1942 2005
2006 /* stop the trace while dumping */
2007 tracing_stop();
2008
1943 if (iter->cpu_file == TRACE_PIPE_ALL_CPU) { 2009 if (iter->cpu_file == TRACE_PIPE_ALL_CPU) {
1944 for_each_tracing_cpu(cpu) { 2010 for_each_tracing_cpu(cpu) {
1945 2011
1946 iter->buffer_iter[cpu] = 2012 iter->buffer_iter[cpu] =
1947 ring_buffer_read_start(iter->tr->buffer, cpu); 2013 ring_buffer_read_start(iter->tr->buffer, cpu);
2014 tracing_iter_reset(iter, cpu);
1948 } 2015 }
1949 } else { 2016 } else {
1950 cpu = iter->cpu_file; 2017 cpu = iter->cpu_file;
1951 iter->buffer_iter[cpu] = 2018 iter->buffer_iter[cpu] =
1952 ring_buffer_read_start(iter->tr->buffer, cpu); 2019 ring_buffer_read_start(iter->tr->buffer, cpu);
2020 tracing_iter_reset(iter, cpu);
1953 } 2021 }
1954 2022
1955 /* TODO stop tracer */
1956 ret = seq_open(file, &tracer_seq_ops); 2023 ret = seq_open(file, &tracer_seq_ops);
1957 if (ret < 0) { 2024 if (ret < 0) {
1958 fail_ret = ERR_PTR(ret); 2025 fail_ret = ERR_PTR(ret);
@@ -1962,9 +2029,6 @@ __tracing_open(struct inode *inode, struct file *file)
1962 m = file->private_data; 2029 m = file->private_data;
1963 m->private = iter; 2030 m->private = iter;
1964 2031
1965 /* stop the trace while dumping */
1966 tracing_stop();
1967
1968 mutex_unlock(&trace_types_lock); 2032 mutex_unlock(&trace_types_lock);
1969 2033
1970 return iter; 2034 return iter;
@@ -1975,6 +2039,7 @@ __tracing_open(struct inode *inode, struct file *file)
1975 ring_buffer_read_finish(iter->buffer_iter[cpu]); 2039 ring_buffer_read_finish(iter->buffer_iter[cpu]);
1976 } 2040 }
1977 free_cpumask_var(iter->started); 2041 free_cpumask_var(iter->started);
2042 tracing_start();
1978 fail: 2043 fail:
1979 mutex_unlock(&trace_types_lock); 2044 mutex_unlock(&trace_types_lock);
1980 kfree(iter->trace); 2045 kfree(iter->trace);
@@ -2031,7 +2096,7 @@ static int tracing_open(struct inode *inode, struct file *file)
2031 2096
2032 /* If this file was open for write, then erase contents */ 2097 /* If this file was open for write, then erase contents */
2033 if ((file->f_mode & FMODE_WRITE) && 2098 if ((file->f_mode & FMODE_WRITE) &&
2034 !(file->f_flags & O_APPEND)) { 2099 (file->f_flags & O_TRUNC)) {
2035 long cpu = (long) inode->i_private; 2100 long cpu = (long) inode->i_private;
2036 2101
2037 if (cpu == TRACE_PIPE_ALL_CPU) 2102 if (cpu == TRACE_PIPE_ALL_CPU)
@@ -2096,7 +2161,7 @@ static int t_show(struct seq_file *m, void *v)
2096 return 0; 2161 return 0;
2097} 2162}
2098 2163
2099static struct seq_operations show_traces_seq_ops = { 2164static const struct seq_operations show_traces_seq_ops = {
2100 .start = t_start, 2165 .start = t_start,
2101 .next = t_next, 2166 .next = t_next,
2102 .stop = t_stop, 2167 .stop = t_stop,
@@ -2256,8 +2321,8 @@ tracing_trace_options_read(struct file *filp, char __user *ubuf,
2256 len += 3; /* "no" and newline */ 2321 len += 3; /* "no" and newline */
2257 } 2322 }
2258 2323
2259 /* +2 for \n and \0 */ 2324 /* +1 for \0 */
2260 buf = kmalloc(len + 2, GFP_KERNEL); 2325 buf = kmalloc(len + 1, GFP_KERNEL);
2261 if (!buf) { 2326 if (!buf) {
2262 mutex_unlock(&trace_types_lock); 2327 mutex_unlock(&trace_types_lock);
2263 return -ENOMEM; 2328 return -ENOMEM;
@@ -2280,7 +2345,7 @@ tracing_trace_options_read(struct file *filp, char __user *ubuf,
2280 } 2345 }
2281 mutex_unlock(&trace_types_lock); 2346 mutex_unlock(&trace_types_lock);
2282 2347
2283 WARN_ON(r >= len + 2); 2348 WARN_ON(r >= len + 1);
2284 2349
2285 r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 2350 r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2286 2351
@@ -2291,23 +2356,23 @@ tracing_trace_options_read(struct file *filp, char __user *ubuf,
2291/* Try to assign a tracer specific option */ 2356/* Try to assign a tracer specific option */
2292static int set_tracer_option(struct tracer *trace, char *cmp, int neg) 2357static int set_tracer_option(struct tracer *trace, char *cmp, int neg)
2293{ 2358{
2294 struct tracer_flags *trace_flags = trace->flags; 2359 struct tracer_flags *tracer_flags = trace->flags;
2295 struct tracer_opt *opts = NULL; 2360 struct tracer_opt *opts = NULL;
2296 int ret = 0, i = 0; 2361 int ret = 0, i = 0;
2297 int len; 2362 int len;
2298 2363
2299 for (i = 0; trace_flags->opts[i].name; i++) { 2364 for (i = 0; tracer_flags->opts[i].name; i++) {
2300 opts = &trace_flags->opts[i]; 2365 opts = &tracer_flags->opts[i];
2301 len = strlen(opts->name); 2366 len = strlen(opts->name);
2302 2367
2303 if (strncmp(cmp, opts->name, len) == 0) { 2368 if (strncmp(cmp, opts->name, len) == 0) {
2304 ret = trace->set_flag(trace_flags->val, 2369 ret = trace->set_flag(tracer_flags->val,
2305 opts->bit, !neg); 2370 opts->bit, !neg);
2306 break; 2371 break;
2307 } 2372 }
2308 } 2373 }
2309 /* Not found */ 2374 /* Not found */
2310 if (!trace_flags->opts[i].name) 2375 if (!tracer_flags->opts[i].name)
2311 return -EINVAL; 2376 return -EINVAL;
2312 2377
2313 /* Refused to handle */ 2378 /* Refused to handle */
@@ -2315,9 +2380,9 @@ static int set_tracer_option(struct tracer *trace, char *cmp, int neg)
2315 return ret; 2380 return ret;
2316 2381
2317 if (neg) 2382 if (neg)
2318 trace_flags->val &= ~opts->bit; 2383 tracer_flags->val &= ~opts->bit;
2319 else 2384 else
2320 trace_flags->val |= opts->bit; 2385 tracer_flags->val |= opts->bit;
2321 2386
2322 return 0; 2387 return 0;
2323} 2388}
@@ -2332,22 +2397,6 @@ static void set_tracer_flags(unsigned int mask, int enabled)
2332 trace_flags |= mask; 2397 trace_flags |= mask;
2333 else 2398 else
2334 trace_flags &= ~mask; 2399 trace_flags &= ~mask;
2335
2336 if (mask == TRACE_ITER_GLOBAL_CLK) {
2337 u64 (*func)(void);
2338
2339 if (enabled)
2340 func = trace_clock_global;
2341 else
2342 func = trace_clock_local;
2343
2344 mutex_lock(&trace_types_lock);
2345 ring_buffer_set_clock(global_trace.buffer, func);
2346
2347 if (max_tr.buffer)
2348 ring_buffer_set_clock(max_tr.buffer, func);
2349 mutex_unlock(&trace_types_lock);
2350 }
2351} 2400}
2352 2401
2353static ssize_t 2402static ssize_t
@@ -2542,7 +2591,7 @@ static ssize_t
2542tracing_set_trace_read(struct file *filp, char __user *ubuf, 2591tracing_set_trace_read(struct file *filp, char __user *ubuf,
2543 size_t cnt, loff_t *ppos) 2592 size_t cnt, loff_t *ppos)
2544{ 2593{
2545 char buf[max_tracer_type_len+2]; 2594 char buf[MAX_TRACER_SIZE+2];
2546 int r; 2595 int r;
2547 2596
2548 mutex_lock(&trace_types_lock); 2597 mutex_lock(&trace_types_lock);
@@ -2692,15 +2741,15 @@ static ssize_t
2692tracing_set_trace_write(struct file *filp, const char __user *ubuf, 2741tracing_set_trace_write(struct file *filp, const char __user *ubuf,
2693 size_t cnt, loff_t *ppos) 2742 size_t cnt, loff_t *ppos)
2694{ 2743{
2695 char buf[max_tracer_type_len+1]; 2744 char buf[MAX_TRACER_SIZE+1];
2696 int i; 2745 int i;
2697 size_t ret; 2746 size_t ret;
2698 int err; 2747 int err;
2699 2748
2700 ret = cnt; 2749 ret = cnt;
2701 2750
2702 if (cnt > max_tracer_type_len) 2751 if (cnt > MAX_TRACER_SIZE)
2703 cnt = max_tracer_type_len; 2752 cnt = MAX_TRACER_SIZE;
2704 2753
2705 if (copy_from_user(&buf, ubuf, cnt)) 2754 if (copy_from_user(&buf, ubuf, cnt))
2706 return -EFAULT; 2755 return -EFAULT;
@@ -3085,7 +3134,8 @@ tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
3085 break; 3134 break;
3086 } 3135 }
3087 3136
3088 trace_consume(iter); 3137 if (ret != TRACE_TYPE_NO_CONSUME)
3138 trace_consume(iter);
3089 rem -= count; 3139 rem -= count;
3090 if (!find_next_entry_inc(iter)) { 3140 if (!find_next_entry_inc(iter)) {
3091 rem = 0; 3141 rem = 0;
@@ -3314,6 +3364,62 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
3314 return cnt; 3364 return cnt;
3315} 3365}
3316 3366
3367static ssize_t tracing_clock_read(struct file *filp, char __user *ubuf,
3368 size_t cnt, loff_t *ppos)
3369{
3370 char buf[64];
3371 int bufiter = 0;
3372 int i;
3373
3374 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
3375 bufiter += snprintf(buf + bufiter, sizeof(buf) - bufiter,
3376 "%s%s%s%s", i ? " " : "",
3377 i == trace_clock_id ? "[" : "", trace_clocks[i].name,
3378 i == trace_clock_id ? "]" : "");
3379 bufiter += snprintf(buf + bufiter, sizeof(buf) - bufiter, "\n");
3380
3381 return simple_read_from_buffer(ubuf, cnt, ppos, buf, bufiter);
3382}
3383
3384static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
3385 size_t cnt, loff_t *fpos)
3386{
3387 char buf[64];
3388 const char *clockstr;
3389 int i;
3390
3391 if (cnt >= sizeof(buf))
3392 return -EINVAL;
3393
3394 if (copy_from_user(&buf, ubuf, cnt))
3395 return -EFAULT;
3396
3397 buf[cnt] = 0;
3398
3399 clockstr = strstrip(buf);
3400
3401 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
3402 if (strcmp(trace_clocks[i].name, clockstr) == 0)
3403 break;
3404 }
3405 if (i == ARRAY_SIZE(trace_clocks))
3406 return -EINVAL;
3407
3408 trace_clock_id = i;
3409
3410 mutex_lock(&trace_types_lock);
3411
3412 ring_buffer_set_clock(global_trace.buffer, trace_clocks[i].func);
3413 if (max_tr.buffer)
3414 ring_buffer_set_clock(max_tr.buffer, trace_clocks[i].func);
3415
3416 mutex_unlock(&trace_types_lock);
3417
3418 *fpos += cnt;
3419
3420 return cnt;
3421}
3422
3317static const struct file_operations tracing_max_lat_fops = { 3423static const struct file_operations tracing_max_lat_fops = {
3318 .open = tracing_open_generic, 3424 .open = tracing_open_generic,
3319 .read = tracing_max_lat_read, 3425 .read = tracing_max_lat_read,
@@ -3351,6 +3457,12 @@ static const struct file_operations tracing_mark_fops = {
3351 .write = tracing_mark_write, 3457 .write = tracing_mark_write,
3352}; 3458};
3353 3459
3460static const struct file_operations trace_clock_fops = {
3461 .open = tracing_open_generic,
3462 .read = tracing_clock_read,
3463 .write = tracing_clock_write,
3464};
3465
3354struct ftrace_buffer_info { 3466struct ftrace_buffer_info {
3355 struct trace_array *tr; 3467 struct trace_array *tr;
3356 void *spare; 3468 void *spare;
@@ -3631,9 +3743,6 @@ tracing_stats_read(struct file *filp, char __user *ubuf,
3631 cnt = ring_buffer_commit_overrun_cpu(tr->buffer, cpu); 3743 cnt = ring_buffer_commit_overrun_cpu(tr->buffer, cpu);
3632 trace_seq_printf(s, "commit overrun: %ld\n", cnt); 3744 trace_seq_printf(s, "commit overrun: %ld\n", cnt);
3633 3745
3634 cnt = ring_buffer_nmi_dropped_cpu(tr->buffer, cpu);
3635 trace_seq_printf(s, "nmi dropped: %ld\n", cnt);
3636
3637 count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len); 3746 count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
3638 3747
3639 kfree(s); 3748 kfree(s);
@@ -3894,17 +4003,9 @@ trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
3894 if (ret < 0) 4003 if (ret < 0)
3895 return ret; 4004 return ret;
3896 4005
3897 switch (val) { 4006 if (val != 0 && val != 1)
3898 case 0:
3899 trace_flags &= ~(1 << index);
3900 break;
3901 case 1:
3902 trace_flags |= 1 << index;
3903 break;
3904
3905 default:
3906 return -EINVAL; 4007 return -EINVAL;
3907 } 4008 set_tracer_flags(1 << index, val);
3908 4009
3909 *ppos += cnt; 4010 *ppos += cnt;
3910 4011
@@ -4072,11 +4173,13 @@ static __init int tracer_init_debugfs(void)
4072 trace_create_file("current_tracer", 0644, d_tracer, 4173 trace_create_file("current_tracer", 0644, d_tracer,
4073 &global_trace, &set_tracer_fops); 4174 &global_trace, &set_tracer_fops);
4074 4175
4176#ifdef CONFIG_TRACER_MAX_TRACE
4075 trace_create_file("tracing_max_latency", 0644, d_tracer, 4177 trace_create_file("tracing_max_latency", 0644, d_tracer,
4076 &tracing_max_latency, &tracing_max_lat_fops); 4178 &tracing_max_latency, &tracing_max_lat_fops);
4077 4179
4078 trace_create_file("tracing_thresh", 0644, d_tracer, 4180 trace_create_file("tracing_thresh", 0644, d_tracer,
4079 &tracing_thresh, &tracing_max_lat_fops); 4181 &tracing_thresh, &tracing_max_lat_fops);
4182#endif
4080 4183
4081 trace_create_file("README", 0444, d_tracer, 4184 trace_create_file("README", 0444, d_tracer,
4082 NULL, &tracing_readme_fops); 4185 NULL, &tracing_readme_fops);
@@ -4093,6 +4196,9 @@ static __init int tracer_init_debugfs(void)
4093 trace_create_file("saved_cmdlines", 0444, d_tracer, 4196 trace_create_file("saved_cmdlines", 0444, d_tracer,
4094 NULL, &tracing_saved_cmdlines_fops); 4197 NULL, &tracing_saved_cmdlines_fops);
4095 4198
4199 trace_create_file("trace_clock", 0644, d_tracer, NULL,
4200 &trace_clock_fops);
4201
4096#ifdef CONFIG_DYNAMIC_FTRACE 4202#ifdef CONFIG_DYNAMIC_FTRACE
4097 trace_create_file("dyn_ftrace_total_info", 0444, d_tracer, 4203 trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
4098 &ftrace_update_tot_cnt, &tracing_dyn_info_fops); 4204 &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
@@ -4233,8 +4339,11 @@ static void __ftrace_dump(bool disable_tracing)
4233 iter.pos = -1; 4339 iter.pos = -1;
4234 4340
4235 if (find_next_entry_inc(&iter) != NULL) { 4341 if (find_next_entry_inc(&iter) != NULL) {
4236 print_trace_line(&iter); 4342 int ret;
4237 trace_consume(&iter); 4343
4344 ret = print_trace_line(&iter);
4345 if (ret != TRACE_TYPE_NO_CONSUME)
4346 trace_consume(&iter);
4238 } 4347 }
4239 4348
4240 trace_printk_seq(&iter.seq); 4349 trace_printk_seq(&iter.seq);
@@ -4268,7 +4377,6 @@ void ftrace_dump(void)
4268 4377
4269__init static int tracer_alloc_buffers(void) 4378__init static int tracer_alloc_buffers(void)
4270{ 4379{
4271 struct trace_array_cpu *data;
4272 int ring_buf_size; 4380 int ring_buf_size;
4273 int i; 4381 int i;
4274 int ret = -ENOMEM; 4382 int ret = -ENOMEM;
@@ -4279,7 +4387,7 @@ __init static int tracer_alloc_buffers(void)
4279 if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL)) 4387 if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL))
4280 goto out_free_buffer_mask; 4388 goto out_free_buffer_mask;
4281 4389
4282 if (!alloc_cpumask_var(&tracing_reader_cpumask, GFP_KERNEL)) 4390 if (!zalloc_cpumask_var(&tracing_reader_cpumask, GFP_KERNEL))
4283 goto out_free_tracing_cpumask; 4391 goto out_free_tracing_cpumask;
4284 4392
4285 /* To save memory, keep the ring buffer size to its minimum */ 4393 /* To save memory, keep the ring buffer size to its minimum */
@@ -4290,7 +4398,6 @@ __init static int tracer_alloc_buffers(void)
4290 4398
4291 cpumask_copy(tracing_buffer_mask, cpu_possible_mask); 4399 cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
4292 cpumask_copy(tracing_cpumask, cpu_all_mask); 4400 cpumask_copy(tracing_cpumask, cpu_all_mask);
4293 cpumask_clear(tracing_reader_cpumask);
4294 4401
4295 /* TODO: make the number of buffers hot pluggable with CPUS */ 4402 /* TODO: make the number of buffers hot pluggable with CPUS */
4296 global_trace.buffer = ring_buffer_alloc(ring_buf_size, 4403 global_trace.buffer = ring_buffer_alloc(ring_buf_size,
@@ -4318,7 +4425,7 @@ __init static int tracer_alloc_buffers(void)
4318 4425
4319 /* Allocate the first page for all buffers */ 4426 /* Allocate the first page for all buffers */
4320 for_each_tracing_cpu(i) { 4427 for_each_tracing_cpu(i) {
4321 data = global_trace.data[i] = &per_cpu(global_trace_cpu, i); 4428 global_trace.data[i] = &per_cpu(global_trace_cpu, i);
4322 max_tr.data[i] = &per_cpu(max_data, i); 4429 max_tr.data[i] = &per_cpu(max_data, i);
4323 } 4430 }
4324 4431
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 3548ae5cc780..405cb850b75d 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -7,10 +7,10 @@
7#include <linux/clocksource.h> 7#include <linux/clocksource.h>
8#include <linux/ring_buffer.h> 8#include <linux/ring_buffer.h>
9#include <linux/mmiotrace.h> 9#include <linux/mmiotrace.h>
10#include <linux/tracepoint.h>
10#include <linux/ftrace.h> 11#include <linux/ftrace.h>
11#include <trace/boot.h> 12#include <trace/boot.h>
12#include <linux/kmemtrace.h> 13#include <linux/kmemtrace.h>
13#include <trace/power.h>
14 14
15#include <linux/trace_seq.h> 15#include <linux/trace_seq.h>
16#include <linux/ftrace_event.h> 16#include <linux/ftrace_event.h>
@@ -34,167 +34,61 @@ enum trace_type {
34 TRACE_GRAPH_ENT, 34 TRACE_GRAPH_ENT,
35 TRACE_USER_STACK, 35 TRACE_USER_STACK,
36 TRACE_HW_BRANCHES, 36 TRACE_HW_BRANCHES,
37 TRACE_SYSCALL_ENTER,
38 TRACE_SYSCALL_EXIT,
39 TRACE_KMEM_ALLOC, 37 TRACE_KMEM_ALLOC,
40 TRACE_KMEM_FREE, 38 TRACE_KMEM_FREE,
41 TRACE_POWER,
42 TRACE_BLK, 39 TRACE_BLK,
43 40
44 __TRACE_LAST_TYPE, 41 __TRACE_LAST_TYPE,
45}; 42};
46 43
47/* 44enum kmemtrace_type_id {
48 * Function trace entry - function address and parent function addres: 45 KMEMTRACE_TYPE_KMALLOC = 0, /* kmalloc() or kfree(). */
49 */ 46 KMEMTRACE_TYPE_CACHE, /* kmem_cache_*(). */
50struct ftrace_entry { 47 KMEMTRACE_TYPE_PAGES, /* __get_free_pages() and friends. */
51 struct trace_entry ent;
52 unsigned long ip;
53 unsigned long parent_ip;
54};
55
56/* Function call entry */
57struct ftrace_graph_ent_entry {
58 struct trace_entry ent;
59 struct ftrace_graph_ent graph_ent;
60}; 48};
61 49
62/* Function return entry */
63struct ftrace_graph_ret_entry {
64 struct trace_entry ent;
65 struct ftrace_graph_ret ret;
66};
67extern struct tracer boot_tracer; 50extern struct tracer boot_tracer;
68 51
69/* 52#undef __field
70 * Context switch trace entry - which task (and prio) we switched from/to: 53#define __field(type, item) type item;
71 */
72struct ctx_switch_entry {
73 struct trace_entry ent;
74 unsigned int prev_pid;
75 unsigned char prev_prio;
76 unsigned char prev_state;
77 unsigned int next_pid;
78 unsigned char next_prio;
79 unsigned char next_state;
80 unsigned int next_cpu;
81};
82 54
83/* 55#undef __field_struct
84 * Special (free-form) trace entry: 56#define __field_struct(type, item) __field(type, item)
85 */
86struct special_entry {
87 struct trace_entry ent;
88 unsigned long arg1;
89 unsigned long arg2;
90 unsigned long arg3;
91};
92 57
93/* 58#undef __field_desc
94 * Stack-trace entry: 59#define __field_desc(type, container, item)
95 */
96 60
97#define FTRACE_STACK_ENTRIES 8 61#undef __array
62#define __array(type, item, size) type item[size];
98 63
99struct stack_entry { 64#undef __array_desc
100 struct trace_entry ent; 65#define __array_desc(type, container, item, size)
101 unsigned long caller[FTRACE_STACK_ENTRIES];
102};
103 66
104struct userstack_entry { 67#undef __dynamic_array
105 struct trace_entry ent; 68#define __dynamic_array(type, item) type item[];
106 unsigned long caller[FTRACE_STACK_ENTRIES];
107};
108 69
109/* 70#undef F_STRUCT
110 * trace_printk entry: 71#define F_STRUCT(args...) args
111 */
112struct bprint_entry {
113 struct trace_entry ent;
114 unsigned long ip;
115 const char *fmt;
116 u32 buf[];
117};
118
119struct print_entry {
120 struct trace_entry ent;
121 unsigned long ip;
122 char buf[];
123};
124
125#define TRACE_OLD_SIZE 88
126
127struct trace_field_cont {
128 unsigned char type;
129 /* Temporary till we get rid of this completely */
130 char buf[TRACE_OLD_SIZE - 1];
131};
132
133struct trace_mmiotrace_rw {
134 struct trace_entry ent;
135 struct mmiotrace_rw rw;
136};
137
138struct trace_mmiotrace_map {
139 struct trace_entry ent;
140 struct mmiotrace_map map;
141};
142
143struct trace_boot_call {
144 struct trace_entry ent;
145 struct boot_trace_call boot_call;
146};
147
148struct trace_boot_ret {
149 struct trace_entry ent;
150 struct boot_trace_ret boot_ret;
151};
152
153#define TRACE_FUNC_SIZE 30
154#define TRACE_FILE_SIZE 20
155struct trace_branch {
156 struct trace_entry ent;
157 unsigned line;
158 char func[TRACE_FUNC_SIZE+1];
159 char file[TRACE_FILE_SIZE+1];
160 char correct;
161};
162
163struct hw_branch_entry {
164 struct trace_entry ent;
165 u64 from;
166 u64 to;
167};
168 72
169struct trace_power { 73#undef FTRACE_ENTRY
170 struct trace_entry ent; 74#define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \
171 struct power_trace state_data; 75 struct struct_name { \
172}; 76 struct trace_entry ent; \
77 tstruct \
78 }
173 79
174enum kmemtrace_type_id { 80#undef TP_ARGS
175 KMEMTRACE_TYPE_KMALLOC = 0, /* kmalloc() or kfree(). */ 81#define TP_ARGS(args...) args
176 KMEMTRACE_TYPE_CACHE, /* kmem_cache_*(). */
177 KMEMTRACE_TYPE_PAGES, /* __get_free_pages() and friends. */
178};
179 82
180struct kmemtrace_alloc_entry { 83#undef FTRACE_ENTRY_DUP
181 struct trace_entry ent; 84#define FTRACE_ENTRY_DUP(name, name_struct, id, tstruct, printk)
182 enum kmemtrace_type_id type_id;
183 unsigned long call_site;
184 const void *ptr;
185 size_t bytes_req;
186 size_t bytes_alloc;
187 gfp_t gfp_flags;
188 int node;
189};
190 85
191struct kmemtrace_free_entry { 86#include "trace_entries.h"
192 struct trace_entry ent;
193 enum kmemtrace_type_id type_id;
194 unsigned long call_site;
195 const void *ptr;
196};
197 87
88/*
89 * syscalls are special, and need special handling, this is why
90 * they are not included in trace_entries.h
91 */
198struct syscall_trace_enter { 92struct syscall_trace_enter {
199 struct trace_entry ent; 93 struct trace_entry ent;
200 int nr; 94 int nr;
@@ -207,13 +101,12 @@ struct syscall_trace_exit {
207 unsigned long ret; 101 unsigned long ret;
208}; 102};
209 103
210
211/* 104/*
212 * trace_flag_type is an enumeration that holds different 105 * trace_flag_type is an enumeration that holds different
213 * states when a trace occurs. These are: 106 * states when a trace occurs. These are:
214 * IRQS_OFF - interrupts were disabled 107 * IRQS_OFF - interrupts were disabled
215 * IRQS_NOSUPPORT - arch does not support irqs_disabled_flags 108 * IRQS_NOSUPPORT - arch does not support irqs_disabled_flags
216 * NEED_RESCED - reschedule is requested 109 * NEED_RESCHED - reschedule is requested
217 * HARDIRQ - inside an interrupt handler 110 * HARDIRQ - inside an interrupt handler
218 * SOFTIRQ - inside a softirq handler 111 * SOFTIRQ - inside a softirq handler
219 */ 112 */
@@ -236,9 +129,6 @@ struct trace_array_cpu {
236 atomic_t disabled; 129 atomic_t disabled;
237 void *buffer_page; /* ring buffer spare */ 130 void *buffer_page; /* ring buffer spare */
238 131
239 /* these fields get copied into max-trace: */
240 unsigned long trace_idx;
241 unsigned long overrun;
242 unsigned long saved_latency; 132 unsigned long saved_latency;
243 unsigned long critical_start; 133 unsigned long critical_start;
244 unsigned long critical_end; 134 unsigned long critical_end;
@@ -246,6 +136,7 @@ struct trace_array_cpu {
246 unsigned long nice; 136 unsigned long nice;
247 unsigned long policy; 137 unsigned long policy;
248 unsigned long rt_priority; 138 unsigned long rt_priority;
139 unsigned long skipped_entries;
249 cycle_t preempt_timestamp; 140 cycle_t preempt_timestamp;
250 pid_t pid; 141 pid_t pid;
251 uid_t uid; 142 uid_t uid;
@@ -314,15 +205,10 @@ extern void __ftrace_bad_type(void);
314 IF_ASSIGN(var, ent, struct ftrace_graph_ret_entry, \ 205 IF_ASSIGN(var, ent, struct ftrace_graph_ret_entry, \
315 TRACE_GRAPH_RET); \ 206 TRACE_GRAPH_RET); \
316 IF_ASSIGN(var, ent, struct hw_branch_entry, TRACE_HW_BRANCHES);\ 207 IF_ASSIGN(var, ent, struct hw_branch_entry, TRACE_HW_BRANCHES);\
317 IF_ASSIGN(var, ent, struct trace_power, TRACE_POWER); \
318 IF_ASSIGN(var, ent, struct kmemtrace_alloc_entry, \ 208 IF_ASSIGN(var, ent, struct kmemtrace_alloc_entry, \
319 TRACE_KMEM_ALLOC); \ 209 TRACE_KMEM_ALLOC); \
320 IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \ 210 IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \
321 TRACE_KMEM_FREE); \ 211 TRACE_KMEM_FREE); \
322 IF_ASSIGN(var, ent, struct syscall_trace_enter, \
323 TRACE_SYSCALL_ENTER); \
324 IF_ASSIGN(var, ent, struct syscall_trace_exit, \
325 TRACE_SYSCALL_EXIT); \
326 __ftrace_bad_type(); \ 212 __ftrace_bad_type(); \
327 } while (0) 213 } while (0)
328 214
@@ -398,7 +284,6 @@ struct tracer {
398 struct tracer *next; 284 struct tracer *next;
399 int print_max; 285 int print_max;
400 struct tracer_flags *flags; 286 struct tracer_flags *flags;
401 struct tracer_stat *stats;
402}; 287};
403 288
404 289
@@ -423,12 +308,13 @@ void init_tracer_sysprof_debugfs(struct dentry *d_tracer);
423 308
424struct ring_buffer_event; 309struct ring_buffer_event;
425 310
426struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr, 311struct ring_buffer_event *
427 int type, 312trace_buffer_lock_reserve(struct ring_buffer *buffer,
428 unsigned long len, 313 int type,
429 unsigned long flags, 314 unsigned long len,
430 int pc); 315 unsigned long flags,
431void trace_buffer_unlock_commit(struct trace_array *tr, 316 int pc);
317void trace_buffer_unlock_commit(struct ring_buffer *buffer,
432 struct ring_buffer_event *event, 318 struct ring_buffer_event *event,
433 unsigned long flags, int pc); 319 unsigned long flags, int pc);
434 320
@@ -438,10 +324,6 @@ struct trace_entry *tracing_get_trace_entry(struct trace_array *tr,
438struct trace_entry *trace_find_next_entry(struct trace_iterator *iter, 324struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
439 int *ent_cpu, u64 *ent_ts); 325 int *ent_cpu, u64 *ent_ts);
440 326
441void tracing_generic_entry_update(struct trace_entry *entry,
442 unsigned long flags,
443 int pc);
444
445void default_wait_pipe(struct trace_iterator *iter); 327void default_wait_pipe(struct trace_iterator *iter);
446void poll_wait_pipe(struct trace_iterator *iter); 328void poll_wait_pipe(struct trace_iterator *iter);
447 329
@@ -471,6 +353,7 @@ void trace_function(struct trace_array *tr,
471 353
472void trace_graph_return(struct ftrace_graph_ret *trace); 354void trace_graph_return(struct ftrace_graph_ret *trace);
473int trace_graph_entry(struct ftrace_graph_ent *trace); 355int trace_graph_entry(struct ftrace_graph_ent *trace);
356void set_graph_array(struct trace_array *tr);
474 357
475void tracing_start_cmdline_record(void); 358void tracing_start_cmdline_record(void);
476void tracing_stop_cmdline_record(void); 359void tracing_stop_cmdline_record(void);
@@ -479,35 +362,46 @@ void tracing_stop_sched_switch_record(void);
479void tracing_start_sched_switch_record(void); 362void tracing_start_sched_switch_record(void);
480int register_tracer(struct tracer *type); 363int register_tracer(struct tracer *type);
481void unregister_tracer(struct tracer *type); 364void unregister_tracer(struct tracer *type);
365int is_tracing_stopped(void);
482 366
483extern unsigned long nsecs_to_usecs(unsigned long nsecs); 367extern unsigned long nsecs_to_usecs(unsigned long nsecs);
484 368
369#ifdef CONFIG_TRACER_MAX_TRACE
485extern unsigned long tracing_max_latency; 370extern unsigned long tracing_max_latency;
486extern unsigned long tracing_thresh; 371extern unsigned long tracing_thresh;
487 372
488void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu); 373void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu);
489void update_max_tr_single(struct trace_array *tr, 374void update_max_tr_single(struct trace_array *tr,
490 struct task_struct *tsk, int cpu); 375 struct task_struct *tsk, int cpu);
376#endif /* CONFIG_TRACER_MAX_TRACE */
491 377
492void __trace_stack(struct trace_array *tr, 378#ifdef CONFIG_STACKTRACE
493 unsigned long flags, 379void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags,
494 int skip, int pc); 380 int skip, int pc);
495 381
496extern cycle_t ftrace_now(int cpu); 382void ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags,
383 int pc);
497 384
498#ifdef CONFIG_CONTEXT_SWITCH_TRACER 385void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
499typedef void 386 int pc);
500(*tracer_switch_func_t)(void *private, 387#else
501 void *__rq, 388static inline void ftrace_trace_stack(struct trace_array *tr,
502 struct task_struct *prev, 389 unsigned long flags, int skip, int pc)
503 struct task_struct *next); 390{
504 391}
505struct tracer_switch_ops { 392
506 tracer_switch_func_t func; 393static inline void ftrace_trace_userstack(struct trace_array *tr,
507 void *private; 394 unsigned long flags, int pc)
508 struct tracer_switch_ops *next; 395{
509}; 396}
510#endif /* CONFIG_CONTEXT_SWITCH_TRACER */ 397
398static inline void __trace_stack(struct trace_array *tr, unsigned long flags,
399 int skip, int pc)
400{
401}
402#endif /* CONFIG_STACKTRACE */
403
404extern cycle_t ftrace_now(int cpu);
511 405
512extern void trace_find_cmdline(int pid, char comm[]); 406extern void trace_find_cmdline(int pid, char comm[]);
513 407
@@ -517,6 +411,10 @@ extern unsigned long ftrace_update_tot_cnt;
517extern int DYN_FTRACE_TEST_NAME(void); 411extern int DYN_FTRACE_TEST_NAME(void);
518#endif 412#endif
519 413
414extern int ring_buffer_expanded;
415extern bool tracing_selftest_disabled;
416DECLARE_PER_CPU(local_t, ftrace_cpu_disabled);
417
520#ifdef CONFIG_FTRACE_STARTUP_TEST 418#ifdef CONFIG_FTRACE_STARTUP_TEST
521extern int trace_selftest_startup_function(struct tracer *trace, 419extern int trace_selftest_startup_function(struct tracer *trace,
522 struct trace_array *tr); 420 struct trace_array *tr);
@@ -548,9 +446,16 @@ extern int
548trace_vbprintk(unsigned long ip, const char *fmt, va_list args); 446trace_vbprintk(unsigned long ip, const char *fmt, va_list args);
549extern int 447extern int
550trace_vprintk(unsigned long ip, const char *fmt, va_list args); 448trace_vprintk(unsigned long ip, const char *fmt, va_list args);
449extern int
450trace_array_vprintk(struct trace_array *tr,
451 unsigned long ip, const char *fmt, va_list args);
452int trace_array_printk(struct trace_array *tr,
453 unsigned long ip, const char *fmt, ...);
551 454
552extern unsigned long trace_flags; 455extern unsigned long trace_flags;
553 456
457extern int trace_clock_id;
458
554/* Standard output formatting function used for function return traces */ 459/* Standard output formatting function used for function return traces */
555#ifdef CONFIG_FUNCTION_GRAPH_TRACER 460#ifdef CONFIG_FUNCTION_GRAPH_TRACER
556extern enum print_line_t print_graph_function(struct trace_iterator *iter); 461extern enum print_line_t print_graph_function(struct trace_iterator *iter);
@@ -613,6 +518,41 @@ static inline int ftrace_trace_task(struct task_struct *task)
613#endif 518#endif
614 519
615/* 520/*
521 * struct trace_parser - servers for reading the user input separated by spaces
522 * @cont: set if the input is not complete - no final space char was found
523 * @buffer: holds the parsed user input
524 * @idx: user input lenght
525 * @size: buffer size
526 */
527struct trace_parser {
528 bool cont;
529 char *buffer;
530 unsigned idx;
531 unsigned size;
532};
533
534static inline bool trace_parser_loaded(struct trace_parser *parser)
535{
536 return (parser->idx != 0);
537}
538
539static inline bool trace_parser_cont(struct trace_parser *parser)
540{
541 return parser->cont;
542}
543
544static inline void trace_parser_clear(struct trace_parser *parser)
545{
546 parser->cont = false;
547 parser->idx = 0;
548}
549
550extern int trace_parser_get_init(struct trace_parser *parser, int size);
551extern void trace_parser_put(struct trace_parser *parser);
552extern int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
553 size_t cnt, loff_t *ppos);
554
555/*
616 * trace_iterator_flags is an enumeration that defines bit 556 * trace_iterator_flags is an enumeration that defines bit
617 * positions into trace_flags that controls the output. 557 * positions into trace_flags that controls the output.
618 * 558 *
@@ -639,9 +579,8 @@ enum trace_iterator_flags {
639 TRACE_ITER_PRINTK_MSGONLY = 0x10000, 579 TRACE_ITER_PRINTK_MSGONLY = 0x10000,
640 TRACE_ITER_CONTEXT_INFO = 0x20000, /* Print pid/cpu/time */ 580 TRACE_ITER_CONTEXT_INFO = 0x20000, /* Print pid/cpu/time */
641 TRACE_ITER_LATENCY_FMT = 0x40000, 581 TRACE_ITER_LATENCY_FMT = 0x40000,
642 TRACE_ITER_GLOBAL_CLK = 0x80000, 582 TRACE_ITER_SLEEP_TIME = 0x80000,
643 TRACE_ITER_SLEEP_TIME = 0x100000, 583 TRACE_ITER_GRAPH_TIME = 0x100000,
644 TRACE_ITER_GRAPH_TIME = 0x200000,
645}; 584};
646 585
647/* 586/*
@@ -738,6 +677,7 @@ struct ftrace_event_field {
738 struct list_head link; 677 struct list_head link;
739 char *name; 678 char *name;
740 char *type; 679 char *type;
680 int filter_type;
741 int offset; 681 int offset;
742 int size; 682 int size;
743 int is_signed; 683 int is_signed;
@@ -747,13 +687,15 @@ struct event_filter {
747 int n_preds; 687 int n_preds;
748 struct filter_pred **preds; 688 struct filter_pred **preds;
749 char *filter_string; 689 char *filter_string;
690 bool no_reset;
750}; 691};
751 692
752struct event_subsystem { 693struct event_subsystem {
753 struct list_head list; 694 struct list_head list;
754 const char *name; 695 const char *name;
755 struct dentry *entry; 696 struct dentry *entry;
756 void *filter; 697 struct event_filter *filter;
698 int nr_events;
757}; 699};
758 700
759struct filter_pred; 701struct filter_pred;
@@ -781,6 +723,7 @@ extern int apply_subsystem_event_filter(struct event_subsystem *system,
781 char *filter_string); 723 char *filter_string);
782extern void print_subsystem_event_filter(struct event_subsystem *system, 724extern void print_subsystem_event_filter(struct event_subsystem *system,
783 struct trace_seq *s); 725 struct trace_seq *s);
726extern int filter_assign_type(const char *type);
784 727
785static inline int 728static inline int
786filter_check_discard(struct ftrace_event_call *call, void *rec, 729filter_check_discard(struct ftrace_event_call *call, void *rec,
@@ -795,58 +738,18 @@ filter_check_discard(struct ftrace_event_call *call, void *rec,
795 return 0; 738 return 0;
796} 739}
797 740
798#define DEFINE_COMPARISON_PRED(type) \
799static int filter_pred_##type(struct filter_pred *pred, void *event, \
800 int val1, int val2) \
801{ \
802 type *addr = (type *)(event + pred->offset); \
803 type val = (type)pred->val; \
804 int match = 0; \
805 \
806 switch (pred->op) { \
807 case OP_LT: \
808 match = (*addr < val); \
809 break; \
810 case OP_LE: \
811 match = (*addr <= val); \
812 break; \
813 case OP_GT: \
814 match = (*addr > val); \
815 break; \
816 case OP_GE: \
817 match = (*addr >= val); \
818 break; \
819 default: \
820 break; \
821 } \
822 \
823 return match; \
824}
825
826#define DEFINE_EQUALITY_PRED(size) \
827static int filter_pred_##size(struct filter_pred *pred, void *event, \
828 int val1, int val2) \
829{ \
830 u##size *addr = (u##size *)(event + pred->offset); \
831 u##size val = (u##size)pred->val; \
832 int match; \
833 \
834 match = (val == *addr) ^ pred->not; \
835 \
836 return match; \
837}
838
839extern struct mutex event_mutex; 741extern struct mutex event_mutex;
840extern struct list_head ftrace_events; 742extern struct list_head ftrace_events;
841 743
842extern const char *__start___trace_bprintk_fmt[]; 744extern const char *__start___trace_bprintk_fmt[];
843extern const char *__stop___trace_bprintk_fmt[]; 745extern const char *__stop___trace_bprintk_fmt[];
844 746
845#undef TRACE_EVENT_FORMAT 747#undef FTRACE_ENTRY
846#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \ 748#define FTRACE_ENTRY(call, struct_name, id, tstruct, print) \
847 extern struct ftrace_event_call event_##call; 749 extern struct ftrace_event_call event_##call;
848#undef TRACE_EVENT_FORMAT_NOFILTER 750#undef FTRACE_ENTRY_DUP
849#define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct, tpfmt) 751#define FTRACE_ENTRY_DUP(call, struct_name, id, tstruct, print) \
850#include "trace_event_types.h" 752 FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print))
753#include "trace_entries.h"
851 754
852#endif /* _LINUX_KERNEL_TRACE_H */ 755#endif /* _LINUX_KERNEL_TRACE_H */
diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c
index a29ef23ffb47..c21d5f3956ad 100644
--- a/kernel/trace/trace_boot.c
+++ b/kernel/trace/trace_boot.c
@@ -41,14 +41,12 @@ void disable_boot_trace(void)
41 41
42static int boot_trace_init(struct trace_array *tr) 42static int boot_trace_init(struct trace_array *tr)
43{ 43{
44 int cpu;
45 boot_trace = tr; 44 boot_trace = tr;
46 45
47 if (!tr) 46 if (!tr)
48 return 0; 47 return 0;
49 48
50 for_each_cpu(cpu, cpu_possible_mask) 49 tracing_reset_online_cpus(tr);
51 tracing_reset(tr, cpu);
52 50
53 tracing_sched_switch_assign_trace(tr); 51 tracing_sched_switch_assign_trace(tr);
54 return 0; 52 return 0;
@@ -131,7 +129,9 @@ struct tracer boot_tracer __read_mostly =
131 129
132void trace_boot_call(struct boot_trace_call *bt, initcall_t fn) 130void trace_boot_call(struct boot_trace_call *bt, initcall_t fn)
133{ 131{
132 struct ftrace_event_call *call = &event_boot_call;
134 struct ring_buffer_event *event; 133 struct ring_buffer_event *event;
134 struct ring_buffer *buffer;
135 struct trace_boot_call *entry; 135 struct trace_boot_call *entry;
136 struct trace_array *tr = boot_trace; 136 struct trace_array *tr = boot_trace;
137 137
@@ -144,20 +144,24 @@ void trace_boot_call(struct boot_trace_call *bt, initcall_t fn)
144 sprint_symbol(bt->func, (unsigned long)fn); 144 sprint_symbol(bt->func, (unsigned long)fn);
145 preempt_disable(); 145 preempt_disable();
146 146
147 event = trace_buffer_lock_reserve(tr, TRACE_BOOT_CALL, 147 buffer = tr->buffer;
148 event = trace_buffer_lock_reserve(buffer, TRACE_BOOT_CALL,
148 sizeof(*entry), 0, 0); 149 sizeof(*entry), 0, 0);
149 if (!event) 150 if (!event)
150 goto out; 151 goto out;
151 entry = ring_buffer_event_data(event); 152 entry = ring_buffer_event_data(event);
152 entry->boot_call = *bt; 153 entry->boot_call = *bt;
153 trace_buffer_unlock_commit(tr, event, 0, 0); 154 if (!filter_check_discard(call, entry, buffer, event))
155 trace_buffer_unlock_commit(buffer, event, 0, 0);
154 out: 156 out:
155 preempt_enable(); 157 preempt_enable();
156} 158}
157 159
158void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn) 160void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn)
159{ 161{
162 struct ftrace_event_call *call = &event_boot_ret;
160 struct ring_buffer_event *event; 163 struct ring_buffer_event *event;
164 struct ring_buffer *buffer;
161 struct trace_boot_ret *entry; 165 struct trace_boot_ret *entry;
162 struct trace_array *tr = boot_trace; 166 struct trace_array *tr = boot_trace;
163 167
@@ -167,13 +171,15 @@ void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn)
167 sprint_symbol(bt->func, (unsigned long)fn); 171 sprint_symbol(bt->func, (unsigned long)fn);
168 preempt_disable(); 172 preempt_disable();
169 173
170 event = trace_buffer_lock_reserve(tr, TRACE_BOOT_RET, 174 buffer = tr->buffer;
175 event = trace_buffer_lock_reserve(buffer, TRACE_BOOT_RET,
171 sizeof(*entry), 0, 0); 176 sizeof(*entry), 0, 0);
172 if (!event) 177 if (!event)
173 goto out; 178 goto out;
174 entry = ring_buffer_event_data(event); 179 entry = ring_buffer_event_data(event);
175 entry->boot_ret = *bt; 180 entry->boot_ret = *bt;
176 trace_buffer_unlock_commit(tr, event, 0, 0); 181 if (!filter_check_discard(call, entry, buffer, event))
182 trace_buffer_unlock_commit(buffer, event, 0, 0);
177 out: 183 out:
178 preempt_enable(); 184 preempt_enable();
179} 185}
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c
index b588fd81f7f9..20c5f92e28a8 100644
--- a/kernel/trace/trace_clock.c
+++ b/kernel/trace/trace_clock.c
@@ -66,10 +66,14 @@ u64 notrace trace_clock(void)
66 * Used by plugins that need globally coherent timestamps. 66 * Used by plugins that need globally coherent timestamps.
67 */ 67 */
68 68
69static u64 prev_trace_clock_time; 69/* keep prev_time and lock in the same cacheline. */
70 70static struct {
71static raw_spinlock_t trace_clock_lock ____cacheline_aligned_in_smp = 71 u64 prev_time;
72 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; 72 raw_spinlock_t lock;
73} trace_clock_struct ____cacheline_aligned_in_smp =
74 {
75 .lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED,
76 };
73 77
74u64 notrace trace_clock_global(void) 78u64 notrace trace_clock_global(void)
75{ 79{
@@ -88,19 +92,19 @@ u64 notrace trace_clock_global(void)
88 if (unlikely(in_nmi())) 92 if (unlikely(in_nmi()))
89 goto out; 93 goto out;
90 94
91 __raw_spin_lock(&trace_clock_lock); 95 __raw_spin_lock(&trace_clock_struct.lock);
92 96
93 /* 97 /*
94 * TODO: if this happens often then maybe we should reset 98 * TODO: if this happens often then maybe we should reset
95 * my_scd->clock to prev_trace_clock_time+1, to make sure 99 * my_scd->clock to prev_time+1, to make sure
96 * we start ticking with the local clock from now on? 100 * we start ticking with the local clock from now on?
97 */ 101 */
98 if ((s64)(now - prev_trace_clock_time) < 0) 102 if ((s64)(now - trace_clock_struct.prev_time) < 0)
99 now = prev_trace_clock_time + 1; 103 now = trace_clock_struct.prev_time + 1;
100 104
101 prev_trace_clock_time = now; 105 trace_clock_struct.prev_time = now;
102 106
103 __raw_spin_unlock(&trace_clock_lock); 107 __raw_spin_unlock(&trace_clock_struct.lock);
104 108
105 out: 109 out:
106 raw_local_irq_restore(flags); 110 raw_local_irq_restore(flags);
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
new file mode 100644
index 000000000000..ead3d724599d
--- /dev/null
+++ b/kernel/trace/trace_entries.h
@@ -0,0 +1,366 @@
1/*
2 * This file defines the trace event structures that go into the ring
3 * buffer directly. They are created via macros so that changes for them
4 * appear in the format file. Using macros will automate this process.
5 *
6 * The macro used to create a ftrace data structure is:
7 *
8 * FTRACE_ENTRY( name, struct_name, id, structure, print )
9 *
10 * @name: the name used the event name, as well as the name of
11 * the directory that holds the format file.
12 *
13 * @struct_name: the name of the structure that is created.
14 *
15 * @id: The event identifier that is used to detect what event
16 * this is from the ring buffer.
17 *
18 * @structure: the structure layout
19 *
20 * - __field( type, item )
21 * This is equivalent to declaring
22 * type item;
23 * in the structure.
24 * - __array( type, item, size )
25 * This is equivalent to declaring
26 * type item[size];
27 * in the structure.
28 *
29 * * for structures within structures, the format of the internal
30 * structure is layed out. This allows the internal structure
31 * to be deciphered for the format file. Although these macros
32 * may become out of sync with the internal structure, they
33 * will create a compile error if it happens. Since the
34 * internel structures are just tracing helpers, this is not
35 * an issue.
36 *
37 * When an internal structure is used, it should use:
38 *
39 * __field_struct( type, item )
40 *
41 * instead of __field. This will prevent it from being shown in
42 * the output file. The fields in the structure should use.
43 *
44 * __field_desc( type, container, item )
45 * __array_desc( type, container, item, len )
46 *
47 * type, item and len are the same as __field and __array, but
48 * container is added. This is the name of the item in
49 * __field_struct that this is describing.
50 *
51 *
52 * @print: the print format shown to users in the format file.
53 */
54
55/*
56 * Function trace entry - function address and parent function addres:
57 */
58FTRACE_ENTRY(function, ftrace_entry,
59
60 TRACE_FN,
61
62 F_STRUCT(
63 __field( unsigned long, ip )
64 __field( unsigned long, parent_ip )
65 ),
66
67 F_printk(" %lx <-- %lx", __entry->ip, __entry->parent_ip)
68);
69
70/* Function call entry */
71FTRACE_ENTRY(funcgraph_entry, ftrace_graph_ent_entry,
72
73 TRACE_GRAPH_ENT,
74
75 F_STRUCT(
76 __field_struct( struct ftrace_graph_ent, graph_ent )
77 __field_desc( unsigned long, graph_ent, func )
78 __field_desc( int, graph_ent, depth )
79 ),
80
81 F_printk("--> %lx (%d)", __entry->func, __entry->depth)
82);
83
84/* Function return entry */
85FTRACE_ENTRY(funcgraph_exit, ftrace_graph_ret_entry,
86
87 TRACE_GRAPH_RET,
88
89 F_STRUCT(
90 __field_struct( struct ftrace_graph_ret, ret )
91 __field_desc( unsigned long, ret, func )
92 __field_desc( unsigned long long, ret, calltime)
93 __field_desc( unsigned long long, ret, rettime )
94 __field_desc( unsigned long, ret, overrun )
95 __field_desc( int, ret, depth )
96 ),
97
98 F_printk("<-- %lx (%d) (start: %llx end: %llx) over: %d",
99 __entry->func, __entry->depth,
100 __entry->calltime, __entry->rettime,
101 __entry->depth)
102);
103
104/*
105 * Context switch trace entry - which task (and prio) we switched from/to:
106 *
107 * This is used for both wakeup and context switches. We only want
108 * to create one structure, but we need two outputs for it.
109 */
110#define FTRACE_CTX_FIELDS \
111 __field( unsigned int, prev_pid ) \
112 __field( unsigned char, prev_prio ) \
113 __field( unsigned char, prev_state ) \
114 __field( unsigned int, next_pid ) \
115 __field( unsigned char, next_prio ) \
116 __field( unsigned char, next_state ) \
117 __field( unsigned int, next_cpu )
118
119FTRACE_ENTRY(context_switch, ctx_switch_entry,
120
121 TRACE_CTX,
122
123 F_STRUCT(
124 FTRACE_CTX_FIELDS
125 ),
126
127 F_printk("%u:%u:%u ==> %u:%u:%u [%03u]",
128 __entry->prev_pid, __entry->prev_prio, __entry->prev_state,
129 __entry->next_pid, __entry->next_prio, __entry->next_state,
130 __entry->next_cpu
131 )
132);
133
134/*
135 * FTRACE_ENTRY_DUP only creates the format file, it will not
136 * create another structure.
137 */
138FTRACE_ENTRY_DUP(wakeup, ctx_switch_entry,
139
140 TRACE_WAKE,
141
142 F_STRUCT(
143 FTRACE_CTX_FIELDS
144 ),
145
146 F_printk("%u:%u:%u ==+ %u:%u:%u [%03u]",
147 __entry->prev_pid, __entry->prev_prio, __entry->prev_state,
148 __entry->next_pid, __entry->next_prio, __entry->next_state,
149 __entry->next_cpu
150 )
151);
152
153/*
154 * Special (free-form) trace entry:
155 */
156FTRACE_ENTRY(special, special_entry,
157
158 TRACE_SPECIAL,
159
160 F_STRUCT(
161 __field( unsigned long, arg1 )
162 __field( unsigned long, arg2 )
163 __field( unsigned long, arg3 )
164 ),
165
166 F_printk("(%08lx) (%08lx) (%08lx)",
167 __entry->arg1, __entry->arg2, __entry->arg3)
168);
169
170/*
171 * Stack-trace entry:
172 */
173
174#define FTRACE_STACK_ENTRIES 8
175
176FTRACE_ENTRY(kernel_stack, stack_entry,
177
178 TRACE_STACK,
179
180 F_STRUCT(
181 __array( unsigned long, caller, FTRACE_STACK_ENTRIES )
182 ),
183
184 F_printk("\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n"
185 "\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n",
186 __entry->caller[0], __entry->caller[1], __entry->caller[2],
187 __entry->caller[3], __entry->caller[4], __entry->caller[5],
188 __entry->caller[6], __entry->caller[7])
189);
190
191FTRACE_ENTRY(user_stack, userstack_entry,
192
193 TRACE_USER_STACK,
194
195 F_STRUCT(
196 __field( unsigned int, tgid )
197 __array( unsigned long, caller, FTRACE_STACK_ENTRIES )
198 ),
199
200 F_printk("\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n"
201 "\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n",
202 __entry->caller[0], __entry->caller[1], __entry->caller[2],
203 __entry->caller[3], __entry->caller[4], __entry->caller[5],
204 __entry->caller[6], __entry->caller[7])
205);
206
207/*
208 * trace_printk entry:
209 */
210FTRACE_ENTRY(bprint, bprint_entry,
211
212 TRACE_BPRINT,
213
214 F_STRUCT(
215 __field( unsigned long, ip )
216 __field( const char *, fmt )
217 __dynamic_array( u32, buf )
218 ),
219
220 F_printk("%08lx fmt:%p",
221 __entry->ip, __entry->fmt)
222);
223
224FTRACE_ENTRY(print, print_entry,
225
226 TRACE_PRINT,
227
228 F_STRUCT(
229 __field( unsigned long, ip )
230 __dynamic_array( char, buf )
231 ),
232
233 F_printk("%08lx %s",
234 __entry->ip, __entry->buf)
235);
236
237FTRACE_ENTRY(mmiotrace_rw, trace_mmiotrace_rw,
238
239 TRACE_MMIO_RW,
240
241 F_STRUCT(
242 __field_struct( struct mmiotrace_rw, rw )
243 __field_desc( resource_size_t, rw, phys )
244 __field_desc( unsigned long, rw, value )
245 __field_desc( unsigned long, rw, pc )
246 __field_desc( int, rw, map_id )
247 __field_desc( unsigned char, rw, opcode )
248 __field_desc( unsigned char, rw, width )
249 ),
250
251 F_printk("%lx %lx %lx %d %x %x",
252 (unsigned long)__entry->phys, __entry->value, __entry->pc,
253 __entry->map_id, __entry->opcode, __entry->width)
254);
255
256FTRACE_ENTRY(mmiotrace_map, trace_mmiotrace_map,
257
258 TRACE_MMIO_MAP,
259
260 F_STRUCT(
261 __field_struct( struct mmiotrace_map, map )
262 __field_desc( resource_size_t, map, phys )
263 __field_desc( unsigned long, map, virt )
264 __field_desc( unsigned long, map, len )
265 __field_desc( int, map, map_id )
266 __field_desc( unsigned char, map, opcode )
267 ),
268
269 F_printk("%lx %lx %lx %d %x",
270 (unsigned long)__entry->phys, __entry->virt, __entry->len,
271 __entry->map_id, __entry->opcode)
272);
273
274FTRACE_ENTRY(boot_call, trace_boot_call,
275
276 TRACE_BOOT_CALL,
277
278 F_STRUCT(
279 __field_struct( struct boot_trace_call, boot_call )
280 __field_desc( pid_t, boot_call, caller )
281 __array_desc( char, boot_call, func, KSYM_SYMBOL_LEN)
282 ),
283
284 F_printk("%d %s", __entry->caller, __entry->func)
285);
286
287FTRACE_ENTRY(boot_ret, trace_boot_ret,
288
289 TRACE_BOOT_RET,
290
291 F_STRUCT(
292 __field_struct( struct boot_trace_ret, boot_ret )
293 __array_desc( char, boot_ret, func, KSYM_SYMBOL_LEN)
294 __field_desc( int, boot_ret, result )
295 __field_desc( unsigned long, boot_ret, duration )
296 ),
297
298 F_printk("%s %d %lx",
299 __entry->func, __entry->result, __entry->duration)
300);
301
302#define TRACE_FUNC_SIZE 30
303#define TRACE_FILE_SIZE 20
304
305FTRACE_ENTRY(branch, trace_branch,
306
307 TRACE_BRANCH,
308
309 F_STRUCT(
310 __field( unsigned int, line )
311 __array( char, func, TRACE_FUNC_SIZE+1 )
312 __array( char, file, TRACE_FILE_SIZE+1 )
313 __field( char, correct )
314 ),
315
316 F_printk("%u:%s:%s (%u)",
317 __entry->line,
318 __entry->func, __entry->file, __entry->correct)
319);
320
321FTRACE_ENTRY(hw_branch, hw_branch_entry,
322
323 TRACE_HW_BRANCHES,
324
325 F_STRUCT(
326 __field( u64, from )
327 __field( u64, to )
328 ),
329
330 F_printk("from: %llx to: %llx", __entry->from, __entry->to)
331);
332
333FTRACE_ENTRY(kmem_alloc, kmemtrace_alloc_entry,
334
335 TRACE_KMEM_ALLOC,
336
337 F_STRUCT(
338 __field( enum kmemtrace_type_id, type_id )
339 __field( unsigned long, call_site )
340 __field( const void *, ptr )
341 __field( size_t, bytes_req )
342 __field( size_t, bytes_alloc )
343 __field( gfp_t, gfp_flags )
344 __field( int, node )
345 ),
346
347 F_printk("type:%u call_site:%lx ptr:%p req:%zi alloc:%zi"
348 " flags:%x node:%d",
349 __entry->type_id, __entry->call_site, __entry->ptr,
350 __entry->bytes_req, __entry->bytes_alloc,
351 __entry->gfp_flags, __entry->node)
352);
353
354FTRACE_ENTRY(kmem_free, kmemtrace_free_entry,
355
356 TRACE_KMEM_FREE,
357
358 F_STRUCT(
359 __field( enum kmemtrace_type_id, type_id )
360 __field( unsigned long, call_site )
361 __field( const void *, ptr )
362 ),
363
364 F_printk("type:%u call_site:%lx ptr:%p",
365 __entry->type_id, __entry->call_site, __entry->ptr)
366);
diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c
index 5b5895afecfe..dd44b8768867 100644
--- a/kernel/trace/trace_event_profile.c
+++ b/kernel/trace/trace_event_profile.c
@@ -5,8 +5,60 @@
5 * 5 *
6 */ 6 */
7 7
8#include <linux/module.h>
8#include "trace.h" 9#include "trace.h"
9 10
11/*
12 * We can't use a size but a type in alloc_percpu()
13 * So let's create a dummy type that matches the desired size
14 */
15typedef struct {char buf[FTRACE_MAX_PROFILE_SIZE];} profile_buf_t;
16
17char *trace_profile_buf;
18EXPORT_SYMBOL_GPL(trace_profile_buf);
19
20char *trace_profile_buf_nmi;
21EXPORT_SYMBOL_GPL(trace_profile_buf_nmi);
22
23/* Count the events in use (per event id, not per instance) */
24static int total_profile_count;
25
26static int ftrace_profile_enable_event(struct ftrace_event_call *event)
27{
28 char *buf;
29 int ret = -ENOMEM;
30
31 if (atomic_inc_return(&event->profile_count))
32 return 0;
33
34 if (!total_profile_count++) {
35 buf = (char *)alloc_percpu(profile_buf_t);
36 if (!buf)
37 goto fail_buf;
38
39 rcu_assign_pointer(trace_profile_buf, buf);
40
41 buf = (char *)alloc_percpu(profile_buf_t);
42 if (!buf)
43 goto fail_buf_nmi;
44
45 rcu_assign_pointer(trace_profile_buf_nmi, buf);
46 }
47
48 ret = event->profile_enable();
49 if (!ret)
50 return 0;
51
52 kfree(trace_profile_buf_nmi);
53fail_buf_nmi:
54 kfree(trace_profile_buf);
55fail_buf:
56 total_profile_count--;
57 atomic_dec(&event->profile_count);
58
59 return ret;
60}
61
10int ftrace_profile_enable(int event_id) 62int ftrace_profile_enable(int event_id)
11{ 63{
12 struct ftrace_event_call *event; 64 struct ftrace_event_call *event;
@@ -14,8 +66,9 @@ int ftrace_profile_enable(int event_id)
14 66
15 mutex_lock(&event_mutex); 67 mutex_lock(&event_mutex);
16 list_for_each_entry(event, &ftrace_events, list) { 68 list_for_each_entry(event, &ftrace_events, list) {
17 if (event->id == event_id) { 69 if (event->id == event_id && event->profile_enable &&
18 ret = event->profile_enable(event); 70 try_module_get(event->mod)) {
71 ret = ftrace_profile_enable_event(event);
19 break; 72 break;
20 } 73 }
21 } 74 }
@@ -24,6 +77,33 @@ int ftrace_profile_enable(int event_id)
24 return ret; 77 return ret;
25} 78}
26 79
80static void ftrace_profile_disable_event(struct ftrace_event_call *event)
81{
82 char *buf, *nmi_buf;
83
84 if (!atomic_add_negative(-1, &event->profile_count))
85 return;
86
87 event->profile_disable();
88
89 if (!--total_profile_count) {
90 buf = trace_profile_buf;
91 rcu_assign_pointer(trace_profile_buf, NULL);
92
93 nmi_buf = trace_profile_buf_nmi;
94 rcu_assign_pointer(trace_profile_buf_nmi, NULL);
95
96 /*
97 * Ensure every events in profiling have finished before
98 * releasing the buffers
99 */
100 synchronize_sched();
101
102 free_percpu(buf);
103 free_percpu(nmi_buf);
104 }
105}
106
27void ftrace_profile_disable(int event_id) 107void ftrace_profile_disable(int event_id)
28{ 108{
29 struct ftrace_event_call *event; 109 struct ftrace_event_call *event;
@@ -31,7 +111,8 @@ void ftrace_profile_disable(int event_id)
31 mutex_lock(&event_mutex); 111 mutex_lock(&event_mutex);
32 list_for_each_entry(event, &ftrace_events, list) { 112 list_for_each_entry(event, &ftrace_events, list) {
33 if (event->id == event_id) { 113 if (event->id == event_id) {
34 event->profile_disable(event); 114 ftrace_profile_disable_event(event);
115 module_put(event->mod);
35 break; 116 break;
36 } 117 }
37 } 118 }
diff --git a/kernel/trace/trace_event_types.h b/kernel/trace/trace_event_types.h
deleted file mode 100644
index 6db005e12487..000000000000
--- a/kernel/trace/trace_event_types.h
+++ /dev/null
@@ -1,178 +0,0 @@
1#undef TRACE_SYSTEM
2#define TRACE_SYSTEM ftrace
3
4/*
5 * We cheat and use the proto type field as the ID
6 * and args as the entry type (minus 'struct')
7 */
8TRACE_EVENT_FORMAT(function, TRACE_FN, ftrace_entry, ignore,
9 TRACE_STRUCT(
10 TRACE_FIELD(unsigned long, ip, ip)
11 TRACE_FIELD(unsigned long, parent_ip, parent_ip)
12 ),
13 TP_RAW_FMT(" %lx <-- %lx")
14);
15
16TRACE_EVENT_FORMAT(funcgraph_entry, TRACE_GRAPH_ENT,
17 ftrace_graph_ent_entry, ignore,
18 TRACE_STRUCT(
19 TRACE_FIELD(unsigned long, graph_ent.func, func)
20 TRACE_FIELD(int, graph_ent.depth, depth)
21 ),
22 TP_RAW_FMT("--> %lx (%d)")
23);
24
25TRACE_EVENT_FORMAT(funcgraph_exit, TRACE_GRAPH_RET,
26 ftrace_graph_ret_entry, ignore,
27 TRACE_STRUCT(
28 TRACE_FIELD(unsigned long, ret.func, func)
29 TRACE_FIELD(unsigned long long, ret.calltime, calltime)
30 TRACE_FIELD(unsigned long long, ret.rettime, rettime)
31 TRACE_FIELD(unsigned long, ret.overrun, overrun)
32 TRACE_FIELD(int, ret.depth, depth)
33 ),
34 TP_RAW_FMT("<-- %lx (%d)")
35);
36
37TRACE_EVENT_FORMAT(wakeup, TRACE_WAKE, ctx_switch_entry, ignore,
38 TRACE_STRUCT(
39 TRACE_FIELD(unsigned int, prev_pid, prev_pid)
40 TRACE_FIELD(unsigned char, prev_prio, prev_prio)
41 TRACE_FIELD(unsigned char, prev_state, prev_state)
42 TRACE_FIELD(unsigned int, next_pid, next_pid)
43 TRACE_FIELD(unsigned char, next_prio, next_prio)
44 TRACE_FIELD(unsigned char, next_state, next_state)
45 TRACE_FIELD(unsigned int, next_cpu, next_cpu)
46 ),
47 TP_RAW_FMT("%u:%u:%u ==+ %u:%u:%u [%03u]")
48);
49
50TRACE_EVENT_FORMAT(context_switch, TRACE_CTX, ctx_switch_entry, ignore,
51 TRACE_STRUCT(
52 TRACE_FIELD(unsigned int, prev_pid, prev_pid)
53 TRACE_FIELD(unsigned char, prev_prio, prev_prio)
54 TRACE_FIELD(unsigned char, prev_state, prev_state)
55 TRACE_FIELD(unsigned int, next_pid, next_pid)
56 TRACE_FIELD(unsigned char, next_prio, next_prio)
57 TRACE_FIELD(unsigned char, next_state, next_state)
58 TRACE_FIELD(unsigned int, next_cpu, next_cpu)
59 ),
60 TP_RAW_FMT("%u:%u:%u ==+ %u:%u:%u [%03u]")
61);
62
63TRACE_EVENT_FORMAT_NOFILTER(special, TRACE_SPECIAL, special_entry, ignore,
64 TRACE_STRUCT(
65 TRACE_FIELD(unsigned long, arg1, arg1)
66 TRACE_FIELD(unsigned long, arg2, arg2)
67 TRACE_FIELD(unsigned long, arg3, arg3)
68 ),
69 TP_RAW_FMT("(%08lx) (%08lx) (%08lx)")
70);
71
72/*
73 * Stack-trace entry:
74 */
75
76/* #define FTRACE_STACK_ENTRIES 8 */
77
78TRACE_EVENT_FORMAT(kernel_stack, TRACE_STACK, stack_entry, ignore,
79 TRACE_STRUCT(
80 TRACE_FIELD(unsigned long, caller[0], stack0)
81 TRACE_FIELD(unsigned long, caller[1], stack1)
82 TRACE_FIELD(unsigned long, caller[2], stack2)
83 TRACE_FIELD(unsigned long, caller[3], stack3)
84 TRACE_FIELD(unsigned long, caller[4], stack4)
85 TRACE_FIELD(unsigned long, caller[5], stack5)
86 TRACE_FIELD(unsigned long, caller[6], stack6)
87 TRACE_FIELD(unsigned long, caller[7], stack7)
88 ),
89 TP_RAW_FMT("\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n"
90 "\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n")
91);
92
93TRACE_EVENT_FORMAT(user_stack, TRACE_USER_STACK, userstack_entry, ignore,
94 TRACE_STRUCT(
95 TRACE_FIELD(unsigned long, caller[0], stack0)
96 TRACE_FIELD(unsigned long, caller[1], stack1)
97 TRACE_FIELD(unsigned long, caller[2], stack2)
98 TRACE_FIELD(unsigned long, caller[3], stack3)
99 TRACE_FIELD(unsigned long, caller[4], stack4)
100 TRACE_FIELD(unsigned long, caller[5], stack5)
101 TRACE_FIELD(unsigned long, caller[6], stack6)
102 TRACE_FIELD(unsigned long, caller[7], stack7)
103 ),
104 TP_RAW_FMT("\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n"
105 "\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n")
106);
107
108TRACE_EVENT_FORMAT(bprint, TRACE_BPRINT, bprint_entry, ignore,
109 TRACE_STRUCT(
110 TRACE_FIELD(unsigned long, ip, ip)
111 TRACE_FIELD(char *, fmt, fmt)
112 TRACE_FIELD_ZERO_CHAR(buf)
113 ),
114 TP_RAW_FMT("%08lx (%d) fmt:%p %s")
115);
116
117TRACE_EVENT_FORMAT(print, TRACE_PRINT, print_entry, ignore,
118 TRACE_STRUCT(
119 TRACE_FIELD(unsigned long, ip, ip)
120 TRACE_FIELD_ZERO_CHAR(buf)
121 ),
122 TP_RAW_FMT("%08lx (%d) fmt:%p %s")
123);
124
125TRACE_EVENT_FORMAT(branch, TRACE_BRANCH, trace_branch, ignore,
126 TRACE_STRUCT(
127 TRACE_FIELD(unsigned int, line, line)
128 TRACE_FIELD_SPECIAL(char func[TRACE_FUNC_SIZE+1], func,
129 TRACE_FUNC_SIZE+1, func)
130 TRACE_FIELD_SPECIAL(char file[TRACE_FUNC_SIZE+1], file,
131 TRACE_FUNC_SIZE+1, file)
132 TRACE_FIELD(char, correct, correct)
133 ),
134 TP_RAW_FMT("%u:%s:%s (%u)")
135);
136
137TRACE_EVENT_FORMAT(hw_branch, TRACE_HW_BRANCHES, hw_branch_entry, ignore,
138 TRACE_STRUCT(
139 TRACE_FIELD(u64, from, from)
140 TRACE_FIELD(u64, to, to)
141 ),
142 TP_RAW_FMT("from: %llx to: %llx")
143);
144
145TRACE_EVENT_FORMAT(power, TRACE_POWER, trace_power, ignore,
146 TRACE_STRUCT(
147 TRACE_FIELD_SIGN(ktime_t, state_data.stamp, stamp, 1)
148 TRACE_FIELD_SIGN(ktime_t, state_data.end, end, 1)
149 TRACE_FIELD(int, state_data.type, type)
150 TRACE_FIELD(int, state_data.state, state)
151 ),
152 TP_RAW_FMT("%llx->%llx type:%u state:%u")
153);
154
155TRACE_EVENT_FORMAT(kmem_alloc, TRACE_KMEM_ALLOC, kmemtrace_alloc_entry, ignore,
156 TRACE_STRUCT(
157 TRACE_FIELD(enum kmemtrace_type_id, type_id, type_id)
158 TRACE_FIELD(unsigned long, call_site, call_site)
159 TRACE_FIELD(const void *, ptr, ptr)
160 TRACE_FIELD(size_t, bytes_req, bytes_req)
161 TRACE_FIELD(size_t, bytes_alloc, bytes_alloc)
162 TRACE_FIELD(gfp_t, gfp_flags, gfp_flags)
163 TRACE_FIELD(int, node, node)
164 ),
165 TP_RAW_FMT("type:%u call_site:%lx ptr:%p req:%lu alloc:%lu"
166 " flags:%x node:%d")
167);
168
169TRACE_EVENT_FORMAT(kmem_free, TRACE_KMEM_FREE, kmemtrace_free_entry, ignore,
170 TRACE_STRUCT(
171 TRACE_FIELD(enum kmemtrace_type_id, type_id, type_id)
172 TRACE_FIELD(unsigned long, call_site, call_site)
173 TRACE_FIELD(const void *, ptr, ptr)
174 ),
175 TP_RAW_FMT("type:%u call_site:%lx ptr:%p")
176);
177
178#undef TRACE_SYSTEM
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 53c8fd376a88..d128f65778e6 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -17,16 +17,20 @@
17#include <linux/ctype.h> 17#include <linux/ctype.h>
18#include <linux/delay.h> 18#include <linux/delay.h>
19 19
20#include <asm/setup.h>
21
20#include "trace_output.h" 22#include "trace_output.h"
21 23
24#undef TRACE_SYSTEM
22#define TRACE_SYSTEM "TRACE_SYSTEM" 25#define TRACE_SYSTEM "TRACE_SYSTEM"
23 26
24DEFINE_MUTEX(event_mutex); 27DEFINE_MUTEX(event_mutex);
25 28
26LIST_HEAD(ftrace_events); 29LIST_HEAD(ftrace_events);
27 30
28int trace_define_field(struct ftrace_event_call *call, char *type, 31int trace_define_field(struct ftrace_event_call *call, const char *type,
29 char *name, int offset, int size, int is_signed) 32 const char *name, int offset, int size, int is_signed,
33 int filter_type)
30{ 34{
31 struct ftrace_event_field *field; 35 struct ftrace_event_field *field;
32 36
@@ -42,9 +46,15 @@ int trace_define_field(struct ftrace_event_call *call, char *type,
42 if (!field->type) 46 if (!field->type)
43 goto err; 47 goto err;
44 48
49 if (filter_type == FILTER_OTHER)
50 field->filter_type = filter_assign_type(type);
51 else
52 field->filter_type = filter_type;
53
45 field->offset = offset; 54 field->offset = offset;
46 field->size = size; 55 field->size = size;
47 field->is_signed = is_signed; 56 field->is_signed = is_signed;
57
48 list_add(&field->link, &call->fields); 58 list_add(&field->link, &call->fields);
49 59
50 return 0; 60 return 0;
@@ -60,6 +70,29 @@ err:
60} 70}
61EXPORT_SYMBOL_GPL(trace_define_field); 71EXPORT_SYMBOL_GPL(trace_define_field);
62 72
73#define __common_field(type, item) \
74 ret = trace_define_field(call, #type, "common_" #item, \
75 offsetof(typeof(ent), item), \
76 sizeof(ent.item), \
77 is_signed_type(type), FILTER_OTHER); \
78 if (ret) \
79 return ret;
80
81int trace_define_common_fields(struct ftrace_event_call *call)
82{
83 int ret;
84 struct trace_entry ent;
85
86 __common_field(unsigned short, type);
87 __common_field(unsigned char, flags);
88 __common_field(unsigned char, preempt_count);
89 __common_field(int, pid);
90 __common_field(int, lock_depth);
91
92 return ret;
93}
94EXPORT_SYMBOL_GPL(trace_define_common_fields);
95
63#ifdef CONFIG_MODULES 96#ifdef CONFIG_MODULES
64 97
65static void trace_destroy_fields(struct ftrace_event_call *call) 98static void trace_destroy_fields(struct ftrace_event_call *call)
@@ -84,14 +117,14 @@ static void ftrace_event_enable_disable(struct ftrace_event_call *call,
84 if (call->enabled) { 117 if (call->enabled) {
85 call->enabled = 0; 118 call->enabled = 0;
86 tracing_stop_cmdline_record(); 119 tracing_stop_cmdline_record();
87 call->unregfunc(); 120 call->unregfunc(call->data);
88 } 121 }
89 break; 122 break;
90 case 1: 123 case 1:
91 if (!call->enabled) { 124 if (!call->enabled) {
92 call->enabled = 1; 125 call->enabled = 1;
93 tracing_start_cmdline_record(); 126 tracing_start_cmdline_record();
94 call->regfunc(); 127 call->regfunc(call->data);
95 } 128 }
96 break; 129 break;
97 } 130 }
@@ -198,73 +231,38 @@ static ssize_t
198ftrace_event_write(struct file *file, const char __user *ubuf, 231ftrace_event_write(struct file *file, const char __user *ubuf,
199 size_t cnt, loff_t *ppos) 232 size_t cnt, loff_t *ppos)
200{ 233{
201 size_t read = 0; 234 struct trace_parser parser;
202 int i, set = 1; 235 ssize_t read, ret;
203 ssize_t ret;
204 char *buf;
205 char ch;
206 236
207 if (!cnt || cnt < 0) 237 if (!cnt)
208 return 0; 238 return 0;
209 239
210 ret = tracing_update_buffers(); 240 ret = tracing_update_buffers();
211 if (ret < 0) 241 if (ret < 0)
212 return ret; 242 return ret;
213 243
214 ret = get_user(ch, ubuf++); 244 if (trace_parser_get_init(&parser, EVENT_BUF_SIZE + 1))
215 if (ret)
216 return ret;
217 read++;
218 cnt--;
219
220 /* skip white space */
221 while (cnt && isspace(ch)) {
222 ret = get_user(ch, ubuf++);
223 if (ret)
224 return ret;
225 read++;
226 cnt--;
227 }
228
229 /* Only white space found? */
230 if (isspace(ch)) {
231 file->f_pos += read;
232 ret = read;
233 return ret;
234 }
235
236 buf = kmalloc(EVENT_BUF_SIZE+1, GFP_KERNEL);
237 if (!buf)
238 return -ENOMEM; 245 return -ENOMEM;
239 246
240 if (cnt > EVENT_BUF_SIZE) 247 read = trace_get_user(&parser, ubuf, cnt, ppos);
241 cnt = EVENT_BUF_SIZE;
242 248
243 i = 0; 249 if (read >= 0 && trace_parser_loaded((&parser))) {
244 while (cnt && !isspace(ch)) { 250 int set = 1;
245 if (!i && ch == '!') 251
252 if (*parser.buffer == '!')
246 set = 0; 253 set = 0;
247 else
248 buf[i++] = ch;
249 254
250 ret = get_user(ch, ubuf++); 255 parser.buffer[parser.idx] = 0;
256
257 ret = ftrace_set_clr_event(parser.buffer + !set, set);
251 if (ret) 258 if (ret)
252 goto out_free; 259 goto out_put;
253 read++;
254 cnt--;
255 } 260 }
256 buf[i] = 0;
257
258 file->f_pos += read;
259
260 ret = ftrace_set_clr_event(buf, set);
261 if (ret)
262 goto out_free;
263 261
264 ret = read; 262 ret = read;
265 263
266 out_free: 264 out_put:
267 kfree(buf); 265 trace_parser_put(&parser);
268 266
269 return ret; 267 return ret;
270} 268}
@@ -272,42 +270,32 @@ ftrace_event_write(struct file *file, const char __user *ubuf,
272static void * 270static void *
273t_next(struct seq_file *m, void *v, loff_t *pos) 271t_next(struct seq_file *m, void *v, loff_t *pos)
274{ 272{
275 struct list_head *list = m->private; 273 struct ftrace_event_call *call = v;
276 struct ftrace_event_call *call;
277 274
278 (*pos)++; 275 (*pos)++;
279 276
280 for (;;) { 277 list_for_each_entry_continue(call, &ftrace_events, list) {
281 if (list == &ftrace_events)
282 return NULL;
283
284 call = list_entry(list, struct ftrace_event_call, list);
285
286 /* 278 /*
287 * The ftrace subsystem is for showing formats only. 279 * The ftrace subsystem is for showing formats only.
288 * They can not be enabled or disabled via the event files. 280 * They can not be enabled or disabled via the event files.
289 */ 281 */
290 if (call->regfunc) 282 if (call->regfunc)
291 break; 283 return call;
292
293 list = list->next;
294 } 284 }
295 285
296 m->private = list->next; 286 return NULL;
297
298 return call;
299} 287}
300 288
301static void *t_start(struct seq_file *m, loff_t *pos) 289static void *t_start(struct seq_file *m, loff_t *pos)
302{ 290{
303 struct ftrace_event_call *call = NULL; 291 struct ftrace_event_call *call;
304 loff_t l; 292 loff_t l;
305 293
306 mutex_lock(&event_mutex); 294 mutex_lock(&event_mutex);
307 295
308 m->private = ftrace_events.next; 296 call = list_entry(&ftrace_events, struct ftrace_event_call, list);
309 for (l = 0; l <= *pos; ) { 297 for (l = 0; l <= *pos; ) {
310 call = t_next(m, NULL, &l); 298 call = t_next(m, call, &l);
311 if (!call) 299 if (!call)
312 break; 300 break;
313 } 301 }
@@ -317,37 +305,28 @@ static void *t_start(struct seq_file *m, loff_t *pos)
317static void * 305static void *
318s_next(struct seq_file *m, void *v, loff_t *pos) 306s_next(struct seq_file *m, void *v, loff_t *pos)
319{ 307{
320 struct list_head *list = m->private; 308 struct ftrace_event_call *call = v;
321 struct ftrace_event_call *call;
322 309
323 (*pos)++; 310 (*pos)++;
324 311
325 retry: 312 list_for_each_entry_continue(call, &ftrace_events, list) {
326 if (list == &ftrace_events) 313 if (call->enabled)
327 return NULL; 314 return call;
328
329 call = list_entry(list, struct ftrace_event_call, list);
330
331 if (!call->enabled) {
332 list = list->next;
333 goto retry;
334 } 315 }
335 316
336 m->private = list->next; 317 return NULL;
337
338 return call;
339} 318}
340 319
341static void *s_start(struct seq_file *m, loff_t *pos) 320static void *s_start(struct seq_file *m, loff_t *pos)
342{ 321{
343 struct ftrace_event_call *call = NULL; 322 struct ftrace_event_call *call;
344 loff_t l; 323 loff_t l;
345 324
346 mutex_lock(&event_mutex); 325 mutex_lock(&event_mutex);
347 326
348 m->private = ftrace_events.next; 327 call = list_entry(&ftrace_events, struct ftrace_event_call, list);
349 for (l = 0; l <= *pos; ) { 328 for (l = 0; l <= *pos; ) {
350 call = s_next(m, NULL, &l); 329 call = s_next(m, call, &l);
351 if (!call) 330 if (!call)
352 break; 331 break;
353 } 332 }
@@ -376,7 +355,7 @@ ftrace_event_seq_open(struct inode *inode, struct file *file)
376 const struct seq_operations *seq_ops; 355 const struct seq_operations *seq_ops;
377 356
378 if ((file->f_mode & FMODE_WRITE) && 357 if ((file->f_mode & FMODE_WRITE) &&
379 !(file->f_flags & O_APPEND)) 358 (file->f_flags & O_TRUNC))
380 ftrace_clear_events(); 359 ftrace_clear_events();
381 360
382 seq_ops = inode->i_private; 361 seq_ops = inode->i_private;
@@ -546,7 +525,7 @@ static int trace_write_header(struct trace_seq *s)
546 FIELD(unsigned char, flags), 525 FIELD(unsigned char, flags),
547 FIELD(unsigned char, preempt_count), 526 FIELD(unsigned char, preempt_count),
548 FIELD(int, pid), 527 FIELD(int, pid),
549 FIELD(int, tgid)); 528 FIELD(int, lock_depth));
550} 529}
551 530
552static ssize_t 531static ssize_t
@@ -574,7 +553,7 @@ event_format_read(struct file *filp, char __user *ubuf, size_t cnt,
574 trace_seq_printf(s, "format:\n"); 553 trace_seq_printf(s, "format:\n");
575 trace_write_header(s); 554 trace_write_header(s);
576 555
577 r = call->show_format(s); 556 r = call->show_format(call, s);
578 if (!r) { 557 if (!r) {
579 /* 558 /*
580 * ug! The format output is bigger than a PAGE!! 559 * ug! The format output is bigger than a PAGE!!
@@ -849,8 +828,10 @@ event_subsystem_dir(const char *name, struct dentry *d_events)
849 828
850 /* First see if we did not already create this dir */ 829 /* First see if we did not already create this dir */
851 list_for_each_entry(system, &event_subsystems, list) { 830 list_for_each_entry(system, &event_subsystems, list) {
852 if (strcmp(system->name, name) == 0) 831 if (strcmp(system->name, name) == 0) {
832 system->nr_events++;
853 return system->entry; 833 return system->entry;
834 }
854 } 835 }
855 836
856 /* need to create new entry */ 837 /* need to create new entry */
@@ -869,6 +850,7 @@ event_subsystem_dir(const char *name, struct dentry *d_events)
869 return d_events; 850 return d_events;
870 } 851 }
871 852
853 system->nr_events = 1;
872 system->name = kstrdup(name, GFP_KERNEL); 854 system->name = kstrdup(name, GFP_KERNEL);
873 if (!system->name) { 855 if (!system->name) {
874 debugfs_remove(system->entry); 856 debugfs_remove(system->entry);
@@ -920,15 +902,6 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
920 if (strcmp(call->system, TRACE_SYSTEM) != 0) 902 if (strcmp(call->system, TRACE_SYSTEM) != 0)
921 d_events = event_subsystem_dir(call->system, d_events); 903 d_events = event_subsystem_dir(call->system, d_events);
922 904
923 if (call->raw_init) {
924 ret = call->raw_init();
925 if (ret < 0) {
926 pr_warning("Could not initialize trace point"
927 " events/%s\n", call->name);
928 return ret;
929 }
930 }
931
932 call->dir = debugfs_create_dir(call->name, d_events); 905 call->dir = debugfs_create_dir(call->name, d_events);
933 if (!call->dir) { 906 if (!call->dir) {
934 pr_warning("Could not create debugfs " 907 pr_warning("Could not create debugfs "
@@ -940,12 +913,12 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
940 entry = trace_create_file("enable", 0644, call->dir, call, 913 entry = trace_create_file("enable", 0644, call->dir, call,
941 enable); 914 enable);
942 915
943 if (call->id) 916 if (call->id && call->profile_enable)
944 entry = trace_create_file("id", 0444, call->dir, call, 917 entry = trace_create_file("id", 0444, call->dir, call,
945 id); 918 id);
946 919
947 if (call->define_fields) { 920 if (call->define_fields) {
948 ret = call->define_fields(); 921 ret = call->define_fields(call);
949 if (ret < 0) { 922 if (ret < 0) {
950 pr_warning("Could not initialize trace point" 923 pr_warning("Could not initialize trace point"
951 " events/%s\n", call->name); 924 " events/%s\n", call->name);
@@ -987,6 +960,32 @@ struct ftrace_module_file_ops {
987 struct file_operations filter; 960 struct file_operations filter;
988}; 961};
989 962
963static void remove_subsystem_dir(const char *name)
964{
965 struct event_subsystem *system;
966
967 if (strcmp(name, TRACE_SYSTEM) == 0)
968 return;
969
970 list_for_each_entry(system, &event_subsystems, list) {
971 if (strcmp(system->name, name) == 0) {
972 if (!--system->nr_events) {
973 struct event_filter *filter = system->filter;
974
975 debugfs_remove_recursive(system->entry);
976 list_del(&system->list);
977 if (filter) {
978 kfree(filter->filter_string);
979 kfree(filter);
980 }
981 kfree(system->name);
982 kfree(system);
983 }
984 break;
985 }
986 }
987}
988
990static struct ftrace_module_file_ops * 989static struct ftrace_module_file_ops *
991trace_create_file_ops(struct module *mod) 990trace_create_file_ops(struct module *mod)
992{ 991{
@@ -1027,6 +1026,7 @@ static void trace_module_add_events(struct module *mod)
1027 struct ftrace_module_file_ops *file_ops = NULL; 1026 struct ftrace_module_file_ops *file_ops = NULL;
1028 struct ftrace_event_call *call, *start, *end; 1027 struct ftrace_event_call *call, *start, *end;
1029 struct dentry *d_events; 1028 struct dentry *d_events;
1029 int ret;
1030 1030
1031 start = mod->trace_events; 1031 start = mod->trace_events;
1032 end = mod->trace_events + mod->num_trace_events; 1032 end = mod->trace_events + mod->num_trace_events;
@@ -1042,7 +1042,15 @@ static void trace_module_add_events(struct module *mod)
1042 /* The linker may leave blanks */ 1042 /* The linker may leave blanks */
1043 if (!call->name) 1043 if (!call->name)
1044 continue; 1044 continue;
1045 1045 if (call->raw_init) {
1046 ret = call->raw_init();
1047 if (ret < 0) {
1048 if (ret != -ENOSYS)
1049 pr_warning("Could not initialize trace "
1050 "point events/%s\n", call->name);
1051 continue;
1052 }
1053 }
1046 /* 1054 /*
1047 * This module has events, create file ops for this module 1055 * This module has events, create file ops for this module
1048 * if not already done. 1056 * if not already done.
@@ -1077,6 +1085,7 @@ static void trace_module_remove_events(struct module *mod)
1077 list_del(&call->list); 1085 list_del(&call->list);
1078 trace_destroy_fields(call); 1086 trace_destroy_fields(call);
1079 destroy_preds(call); 1087 destroy_preds(call);
1088 remove_subsystem_dir(call->system);
1080 } 1089 }
1081 } 1090 }
1082 1091
@@ -1125,7 +1134,7 @@ static int trace_module_notify(struct notifier_block *self,
1125} 1134}
1126#endif /* CONFIG_MODULES */ 1135#endif /* CONFIG_MODULES */
1127 1136
1128struct notifier_block trace_module_nb = { 1137static struct notifier_block trace_module_nb = {
1129 .notifier_call = trace_module_notify, 1138 .notifier_call = trace_module_notify,
1130 .priority = 0, 1139 .priority = 0,
1131}; 1140};
@@ -1133,6 +1142,18 @@ struct notifier_block trace_module_nb = {
1133extern struct ftrace_event_call __start_ftrace_events[]; 1142extern struct ftrace_event_call __start_ftrace_events[];
1134extern struct ftrace_event_call __stop_ftrace_events[]; 1143extern struct ftrace_event_call __stop_ftrace_events[];
1135 1144
1145static char bootup_event_buf[COMMAND_LINE_SIZE] __initdata;
1146
1147static __init int setup_trace_event(char *str)
1148{
1149 strlcpy(bootup_event_buf, str, COMMAND_LINE_SIZE);
1150 ring_buffer_expanded = 1;
1151 tracing_selftest_disabled = 1;
1152
1153 return 1;
1154}
1155__setup("trace_event=", setup_trace_event);
1156
1136static __init int event_trace_init(void) 1157static __init int event_trace_init(void)
1137{ 1158{
1138 struct ftrace_event_call *call; 1159 struct ftrace_event_call *call;
@@ -1140,6 +1161,8 @@ static __init int event_trace_init(void)
1140 struct dentry *entry; 1161 struct dentry *entry;
1141 struct dentry *d_events; 1162 struct dentry *d_events;
1142 int ret; 1163 int ret;
1164 char *buf = bootup_event_buf;
1165 char *token;
1143 1166
1144 d_tracer = tracing_init_dentry(); 1167 d_tracer = tracing_init_dentry();
1145 if (!d_tracer) 1168 if (!d_tracer)
@@ -1179,12 +1202,34 @@ static __init int event_trace_init(void)
1179 /* The linker may leave blanks */ 1202 /* The linker may leave blanks */
1180 if (!call->name) 1203 if (!call->name)
1181 continue; 1204 continue;
1205 if (call->raw_init) {
1206 ret = call->raw_init();
1207 if (ret < 0) {
1208 if (ret != -ENOSYS)
1209 pr_warning("Could not initialize trace "
1210 "point events/%s\n", call->name);
1211 continue;
1212 }
1213 }
1182 list_add(&call->list, &ftrace_events); 1214 list_add(&call->list, &ftrace_events);
1183 event_create_dir(call, d_events, &ftrace_event_id_fops, 1215 event_create_dir(call, d_events, &ftrace_event_id_fops,
1184 &ftrace_enable_fops, &ftrace_event_filter_fops, 1216 &ftrace_enable_fops, &ftrace_event_filter_fops,
1185 &ftrace_event_format_fops); 1217 &ftrace_event_format_fops);
1186 } 1218 }
1187 1219
1220 while (true) {
1221 token = strsep(&buf, ",");
1222
1223 if (!token)
1224 break;
1225 if (!*token)
1226 continue;
1227
1228 ret = ftrace_set_clr_event(token, 1);
1229 if (ret)
1230 pr_warning("Failed to enable trace event: %s\n", token);
1231 }
1232
1188 ret = register_module_notifier(&trace_module_nb); 1233 ret = register_module_notifier(&trace_module_nb);
1189 if (ret) 1234 if (ret)
1190 pr_warning("Failed to register trace events module notifier\n"); 1235 pr_warning("Failed to register trace events module notifier\n");
@@ -1261,6 +1306,18 @@ static __init void event_trace_self_tests(void)
1261 if (!call->regfunc) 1306 if (!call->regfunc)
1262 continue; 1307 continue;
1263 1308
1309/*
1310 * Testing syscall events here is pretty useless, but
1311 * we still do it if configured. But this is time consuming.
1312 * What we really need is a user thread to perform the
1313 * syscalls as we test.
1314 */
1315#ifndef CONFIG_EVENT_TRACE_TEST_SYSCALLS
1316 if (call->system &&
1317 strcmp(call->system, "syscalls") == 0)
1318 continue;
1319#endif
1320
1264 pr_info("Testing event %s: ", call->name); 1321 pr_info("Testing event %s: ", call->name);
1265 1322
1266 /* 1323 /*
@@ -1334,12 +1391,13 @@ static __init void event_trace_self_tests(void)
1334 1391
1335#ifdef CONFIG_FUNCTION_TRACER 1392#ifdef CONFIG_FUNCTION_TRACER
1336 1393
1337static DEFINE_PER_CPU(atomic_t, test_event_disable); 1394static DEFINE_PER_CPU(atomic_t, ftrace_test_event_disable);
1338 1395
1339static void 1396static void
1340function_test_events_call(unsigned long ip, unsigned long parent_ip) 1397function_test_events_call(unsigned long ip, unsigned long parent_ip)
1341{ 1398{
1342 struct ring_buffer_event *event; 1399 struct ring_buffer_event *event;
1400 struct ring_buffer *buffer;
1343 struct ftrace_entry *entry; 1401 struct ftrace_entry *entry;
1344 unsigned long flags; 1402 unsigned long flags;
1345 long disabled; 1403 long disabled;
@@ -1350,14 +1408,15 @@ function_test_events_call(unsigned long ip, unsigned long parent_ip)
1350 pc = preempt_count(); 1408 pc = preempt_count();
1351 resched = ftrace_preempt_disable(); 1409 resched = ftrace_preempt_disable();
1352 cpu = raw_smp_processor_id(); 1410 cpu = raw_smp_processor_id();
1353 disabled = atomic_inc_return(&per_cpu(test_event_disable, cpu)); 1411 disabled = atomic_inc_return(&per_cpu(ftrace_test_event_disable, cpu));
1354 1412
1355 if (disabled != 1) 1413 if (disabled != 1)
1356 goto out; 1414 goto out;
1357 1415
1358 local_save_flags(flags); 1416 local_save_flags(flags);
1359 1417
1360 event = trace_current_buffer_lock_reserve(TRACE_FN, sizeof(*entry), 1418 event = trace_current_buffer_lock_reserve(&buffer,
1419 TRACE_FN, sizeof(*entry),
1361 flags, pc); 1420 flags, pc);
1362 if (!event) 1421 if (!event)
1363 goto out; 1422 goto out;
@@ -1365,10 +1424,10 @@ function_test_events_call(unsigned long ip, unsigned long parent_ip)
1365 entry->ip = ip; 1424 entry->ip = ip;
1366 entry->parent_ip = parent_ip; 1425 entry->parent_ip = parent_ip;
1367 1426
1368 trace_nowake_buffer_unlock_commit(event, flags, pc); 1427 trace_nowake_buffer_unlock_commit(buffer, event, flags, pc);
1369 1428
1370 out: 1429 out:
1371 atomic_dec(&per_cpu(test_event_disable, cpu)); 1430 atomic_dec(&per_cpu(ftrace_test_event_disable, cpu));
1372 ftrace_preempt_enable(resched); 1431 ftrace_preempt_enable(resched);
1373} 1432}
1374 1433
@@ -1392,10 +1451,10 @@ static __init void event_trace_self_test_with_function(void)
1392 1451
1393static __init int event_trace_self_tests_init(void) 1452static __init int event_trace_self_tests_init(void)
1394{ 1453{
1395 1454 if (!tracing_selftest_disabled) {
1396 event_trace_self_tests(); 1455 event_trace_self_tests();
1397 1456 event_trace_self_test_with_function();
1398 event_trace_self_test_with_function(); 1457 }
1399 1458
1400 return 0; 1459 return 0;
1401} 1460}
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 936c621bbf46..23245785927f 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -121,6 +121,47 @@ struct filter_parse_state {
121 } operand; 121 } operand;
122}; 122};
123 123
124#define DEFINE_COMPARISON_PRED(type) \
125static int filter_pred_##type(struct filter_pred *pred, void *event, \
126 int val1, int val2) \
127{ \
128 type *addr = (type *)(event + pred->offset); \
129 type val = (type)pred->val; \
130 int match = 0; \
131 \
132 switch (pred->op) { \
133 case OP_LT: \
134 match = (*addr < val); \
135 break; \
136 case OP_LE: \
137 match = (*addr <= val); \
138 break; \
139 case OP_GT: \
140 match = (*addr > val); \
141 break; \
142 case OP_GE: \
143 match = (*addr >= val); \
144 break; \
145 default: \
146 break; \
147 } \
148 \
149 return match; \
150}
151
152#define DEFINE_EQUALITY_PRED(size) \
153static int filter_pred_##size(struct filter_pred *pred, void *event, \
154 int val1, int val2) \
155{ \
156 u##size *addr = (u##size *)(event + pred->offset); \
157 u##size val = (u##size)pred->val; \
158 int match; \
159 \
160 match = (val == *addr) ^ pred->not; \
161 \
162 return match; \
163}
164
124DEFINE_COMPARISON_PRED(s64); 165DEFINE_COMPARISON_PRED(s64);
125DEFINE_COMPARISON_PRED(u64); 166DEFINE_COMPARISON_PRED(u64);
126DEFINE_COMPARISON_PRED(s32); 167DEFINE_COMPARISON_PRED(s32);
@@ -163,6 +204,20 @@ static int filter_pred_string(struct filter_pred *pred, void *event,
163 return match; 204 return match;
164} 205}
165 206
207/* Filter predicate for char * pointers */
208static int filter_pred_pchar(struct filter_pred *pred, void *event,
209 int val1, int val2)
210{
211 char **addr = (char **)(event + pred->offset);
212 int cmp, match;
213
214 cmp = strncmp(*addr, pred->str_val, pred->str_len);
215
216 match = (!cmp) ^ pred->not;
217
218 return match;
219}
220
166/* 221/*
167 * Filter predicate for dynamic sized arrays of characters. 222 * Filter predicate for dynamic sized arrays of characters.
168 * These are implemented through a list of strings at the end 223 * These are implemented through a list of strings at the end
@@ -176,11 +231,13 @@ static int filter_pred_string(struct filter_pred *pred, void *event,
176static int filter_pred_strloc(struct filter_pred *pred, void *event, 231static int filter_pred_strloc(struct filter_pred *pred, void *event,
177 int val1, int val2) 232 int val1, int val2)
178{ 233{
179 unsigned short str_loc = *(unsigned short *)(event + pred->offset); 234 u32 str_item = *(u32 *)(event + pred->offset);
235 int str_loc = str_item & 0xffff;
236 int str_len = str_item >> 16;
180 char *addr = (char *)(event + str_loc); 237 char *addr = (char *)(event + str_loc);
181 int cmp, match; 238 int cmp, match;
182 239
183 cmp = strncmp(addr, pred->str_val, pred->str_len); 240 cmp = strncmp(addr, pred->str_val, str_len);
184 241
185 match = (!cmp) ^ pred->not; 242 match = (!cmp) ^ pred->not;
186 243
@@ -293,7 +350,7 @@ void print_event_filter(struct ftrace_event_call *call, struct trace_seq *s)
293 struct event_filter *filter = call->filter; 350 struct event_filter *filter = call->filter;
294 351
295 mutex_lock(&event_mutex); 352 mutex_lock(&event_mutex);
296 if (filter->filter_string) 353 if (filter && filter->filter_string)
297 trace_seq_printf(s, "%s\n", filter->filter_string); 354 trace_seq_printf(s, "%s\n", filter->filter_string);
298 else 355 else
299 trace_seq_printf(s, "none\n"); 356 trace_seq_printf(s, "none\n");
@@ -306,7 +363,7 @@ void print_subsystem_event_filter(struct event_subsystem *system,
306 struct event_filter *filter = system->filter; 363 struct event_filter *filter = system->filter;
307 364
308 mutex_lock(&event_mutex); 365 mutex_lock(&event_mutex);
309 if (filter->filter_string) 366 if (filter && filter->filter_string)
310 trace_seq_printf(s, "%s\n", filter->filter_string); 367 trace_seq_printf(s, "%s\n", filter->filter_string);
311 else 368 else
312 trace_seq_printf(s, "none\n"); 369 trace_seq_printf(s, "none\n");
@@ -374,6 +431,9 @@ void destroy_preds(struct ftrace_event_call *call)
374 struct event_filter *filter = call->filter; 431 struct event_filter *filter = call->filter;
375 int i; 432 int i;
376 433
434 if (!filter)
435 return;
436
377 for (i = 0; i < MAX_FILTER_PRED; i++) { 437 for (i = 0; i < MAX_FILTER_PRED; i++) {
378 if (filter->preds[i]) 438 if (filter->preds[i])
379 filter_free_pred(filter->preds[i]); 439 filter_free_pred(filter->preds[i]);
@@ -384,17 +444,19 @@ void destroy_preds(struct ftrace_event_call *call)
384 call->filter = NULL; 444 call->filter = NULL;
385} 445}
386 446
387int init_preds(struct ftrace_event_call *call) 447static int init_preds(struct ftrace_event_call *call)
388{ 448{
389 struct event_filter *filter; 449 struct event_filter *filter;
390 struct filter_pred *pred; 450 struct filter_pred *pred;
391 int i; 451 int i;
392 452
453 if (call->filter)
454 return 0;
455
393 filter = call->filter = kzalloc(sizeof(*filter), GFP_KERNEL); 456 filter = call->filter = kzalloc(sizeof(*filter), GFP_KERNEL);
394 if (!call->filter) 457 if (!call->filter)
395 return -ENOMEM; 458 return -ENOMEM;
396 459
397 call->filter_active = 0;
398 filter->n_preds = 0; 460 filter->n_preds = 0;
399 461
400 filter->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred), GFP_KERNEL); 462 filter->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred), GFP_KERNEL);
@@ -416,30 +478,55 @@ oom:
416 478
417 return -ENOMEM; 479 return -ENOMEM;
418} 480}
419EXPORT_SYMBOL_GPL(init_preds);
420 481
421static void filter_free_subsystem_preds(struct event_subsystem *system) 482static int init_subsystem_preds(struct event_subsystem *system)
422{ 483{
423 struct event_filter *filter = system->filter;
424 struct ftrace_event_call *call; 484 struct ftrace_event_call *call;
425 int i; 485 int err;
426 486
427 if (filter->n_preds) { 487 list_for_each_entry(call, &ftrace_events, list) {
428 for (i = 0; i < filter->n_preds; i++) 488 if (!call->define_fields)
429 filter_free_pred(filter->preds[i]); 489 continue;
430 kfree(filter->preds); 490
431 filter->preds = NULL; 491 if (strcmp(call->system, system->name) != 0)
432 filter->n_preds = 0; 492 continue;
493
494 err = init_preds(call);
495 if (err)
496 return err;
433 } 497 }
434 498
499 return 0;
500}
501
502enum {
503 FILTER_DISABLE_ALL,
504 FILTER_INIT_NO_RESET,
505 FILTER_SKIP_NO_RESET,
506};
507
508static void filter_free_subsystem_preds(struct event_subsystem *system,
509 int flag)
510{
511 struct ftrace_event_call *call;
512
435 list_for_each_entry(call, &ftrace_events, list) { 513 list_for_each_entry(call, &ftrace_events, list) {
436 if (!call->define_fields) 514 if (!call->define_fields)
437 continue; 515 continue;
438 516
439 if (!strcmp(call->system, system->name)) { 517 if (strcmp(call->system, system->name) != 0)
440 filter_disable_preds(call); 518 continue;
441 remove_filter_string(call->filter); 519
520 if (flag == FILTER_INIT_NO_RESET) {
521 call->filter->no_reset = false;
522 continue;
442 } 523 }
524
525 if (flag == FILTER_SKIP_NO_RESET && call->filter->no_reset)
526 continue;
527
528 filter_disable_preds(call);
529 remove_filter_string(call->filter);
443 } 530 }
444} 531}
445 532
@@ -468,12 +555,7 @@ static int filter_add_pred_fn(struct filter_parse_state *ps,
468 return 0; 555 return 0;
469} 556}
470 557
471enum { 558int filter_assign_type(const char *type)
472 FILTER_STATIC_STRING = 1,
473 FILTER_DYN_STRING
474};
475
476static int is_string_field(const char *type)
477{ 559{
478 if (strstr(type, "__data_loc") && strstr(type, "char")) 560 if (strstr(type, "__data_loc") && strstr(type, "char"))
479 return FILTER_DYN_STRING; 561 return FILTER_DYN_STRING;
@@ -481,12 +563,19 @@ static int is_string_field(const char *type)
481 if (strchr(type, '[') && strstr(type, "char")) 563 if (strchr(type, '[') && strstr(type, "char"))
482 return FILTER_STATIC_STRING; 564 return FILTER_STATIC_STRING;
483 565
484 return 0; 566 return FILTER_OTHER;
567}
568
569static bool is_string_field(struct ftrace_event_field *field)
570{
571 return field->filter_type == FILTER_DYN_STRING ||
572 field->filter_type == FILTER_STATIC_STRING ||
573 field->filter_type == FILTER_PTR_STRING;
485} 574}
486 575
487static int is_legal_op(struct ftrace_event_field *field, int op) 576static int is_legal_op(struct ftrace_event_field *field, int op)
488{ 577{
489 if (is_string_field(field->type) && (op != OP_EQ && op != OP_NE)) 578 if (is_string_field(field) && (op != OP_EQ && op != OP_NE))
490 return 0; 579 return 0;
491 580
492 return 1; 581 return 1;
@@ -537,22 +626,24 @@ static filter_pred_fn_t select_comparison_fn(int op, int field_size,
537 626
538static int filter_add_pred(struct filter_parse_state *ps, 627static int filter_add_pred(struct filter_parse_state *ps,
539 struct ftrace_event_call *call, 628 struct ftrace_event_call *call,
540 struct filter_pred *pred) 629 struct filter_pred *pred,
630 bool dry_run)
541{ 631{
542 struct ftrace_event_field *field; 632 struct ftrace_event_field *field;
543 filter_pred_fn_t fn; 633 filter_pred_fn_t fn;
544 unsigned long long val; 634 unsigned long long val;
545 int string_type;
546 int ret; 635 int ret;
547 636
548 pred->fn = filter_pred_none; 637 pred->fn = filter_pred_none;
549 638
550 if (pred->op == OP_AND) { 639 if (pred->op == OP_AND) {
551 pred->pop_n = 2; 640 pred->pop_n = 2;
552 return filter_add_pred_fn(ps, call, pred, filter_pred_and); 641 fn = filter_pred_and;
642 goto add_pred_fn;
553 } else if (pred->op == OP_OR) { 643 } else if (pred->op == OP_OR) {
554 pred->pop_n = 2; 644 pred->pop_n = 2;
555 return filter_add_pred_fn(ps, call, pred, filter_pred_or); 645 fn = filter_pred_or;
646 goto add_pred_fn;
556 } 647 }
557 648
558 field = find_event_field(call, pred->field_name); 649 field = find_event_field(call, pred->field_name);
@@ -568,16 +659,17 @@ static int filter_add_pred(struct filter_parse_state *ps,
568 return -EINVAL; 659 return -EINVAL;
569 } 660 }
570 661
571 string_type = is_string_field(field->type); 662 if (is_string_field(field)) {
572 if (string_type) { 663 pred->str_len = field->size;
573 if (string_type == FILTER_STATIC_STRING) 664
665 if (field->filter_type == FILTER_STATIC_STRING)
574 fn = filter_pred_string; 666 fn = filter_pred_string;
575 else 667 else if (field->filter_type == FILTER_DYN_STRING)
576 fn = filter_pred_strloc; 668 fn = filter_pred_strloc;
577 pred->str_len = field->size; 669 else {
578 if (pred->op == OP_NE) 670 fn = filter_pred_pchar;
579 pred->not = 1; 671 pred->str_len = strlen(pred->str_val);
580 return filter_add_pred_fn(ps, call, pred, fn); 672 }
581 } else { 673 } else {
582 if (field->is_signed) 674 if (field->is_signed)
583 ret = strict_strtoll(pred->str_val, 0, &val); 675 ret = strict_strtoll(pred->str_val, 0, &val);
@@ -588,44 +680,33 @@ static int filter_add_pred(struct filter_parse_state *ps,
588 return -EINVAL; 680 return -EINVAL;
589 } 681 }
590 pred->val = val; 682 pred->val = val;
591 }
592 683
593 fn = select_comparison_fn(pred->op, field->size, field->is_signed); 684 fn = select_comparison_fn(pred->op, field->size,
594 if (!fn) { 685 field->is_signed);
595 parse_error(ps, FILT_ERR_INVALID_OP, 0); 686 if (!fn) {
596 return -EINVAL; 687 parse_error(ps, FILT_ERR_INVALID_OP, 0);
688 return -EINVAL;
689 }
597 } 690 }
598 691
599 if (pred->op == OP_NE) 692 if (pred->op == OP_NE)
600 pred->not = 1; 693 pred->not = 1;
601 694
602 return filter_add_pred_fn(ps, call, pred, fn); 695add_pred_fn:
696 if (!dry_run)
697 return filter_add_pred_fn(ps, call, pred, fn);
698 return 0;
603} 699}
604 700
605static int filter_add_subsystem_pred(struct filter_parse_state *ps, 701static int filter_add_subsystem_pred(struct filter_parse_state *ps,
606 struct event_subsystem *system, 702 struct event_subsystem *system,
607 struct filter_pred *pred, 703 struct filter_pred *pred,
608 char *filter_string) 704 char *filter_string,
705 bool dry_run)
609{ 706{
610 struct event_filter *filter = system->filter;
611 struct ftrace_event_call *call; 707 struct ftrace_event_call *call;
612 int err = 0; 708 int err = 0;
613 709 bool fail = true;
614 if (!filter->preds) {
615 filter->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred),
616 GFP_KERNEL);
617
618 if (!filter->preds)
619 return -ENOMEM;
620 }
621
622 if (filter->n_preds == MAX_FILTER_PRED) {
623 parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0);
624 return -ENOSPC;
625 }
626
627 filter->preds[filter->n_preds] = pred;
628 filter->n_preds++;
629 710
630 list_for_each_entry(call, &ftrace_events, list) { 711 list_for_each_entry(call, &ftrace_events, list) {
631 712
@@ -635,16 +716,24 @@ static int filter_add_subsystem_pred(struct filter_parse_state *ps,
635 if (strcmp(call->system, system->name)) 716 if (strcmp(call->system, system->name))
636 continue; 717 continue;
637 718
638 err = filter_add_pred(ps, call, pred); 719 if (call->filter->no_reset)
639 if (err) { 720 continue;
640 filter_free_subsystem_preds(system); 721
641 parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0); 722 err = filter_add_pred(ps, call, pred, dry_run);
642 goto out; 723 if (err)
643 } 724 call->filter->no_reset = true;
644 replace_filter_string(call->filter, filter_string); 725 else
726 fail = false;
727
728 if (!dry_run)
729 replace_filter_string(call->filter, filter_string);
645 } 730 }
646out: 731
647 return err; 732 if (fail) {
733 parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0);
734 return err;
735 }
736 return 0;
648} 737}
649 738
650static void parse_init(struct filter_parse_state *ps, 739static void parse_init(struct filter_parse_state *ps,
@@ -1003,12 +1092,14 @@ static int check_preds(struct filter_parse_state *ps)
1003static int replace_preds(struct event_subsystem *system, 1092static int replace_preds(struct event_subsystem *system,
1004 struct ftrace_event_call *call, 1093 struct ftrace_event_call *call,
1005 struct filter_parse_state *ps, 1094 struct filter_parse_state *ps,
1006 char *filter_string) 1095 char *filter_string,
1096 bool dry_run)
1007{ 1097{
1008 char *operand1 = NULL, *operand2 = NULL; 1098 char *operand1 = NULL, *operand2 = NULL;
1009 struct filter_pred *pred; 1099 struct filter_pred *pred;
1010 struct postfix_elt *elt; 1100 struct postfix_elt *elt;
1011 int err; 1101 int err;
1102 int n_preds = 0;
1012 1103
1013 err = check_preds(ps); 1104 err = check_preds(ps);
1014 if (err) 1105 if (err)
@@ -1027,19 +1118,14 @@ static int replace_preds(struct event_subsystem *system,
1027 continue; 1118 continue;
1028 } 1119 }
1029 1120
1121 if (n_preds++ == MAX_FILTER_PRED) {
1122 parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0);
1123 return -ENOSPC;
1124 }
1125
1030 if (elt->op == OP_AND || elt->op == OP_OR) { 1126 if (elt->op == OP_AND || elt->op == OP_OR) {
1031 pred = create_logical_pred(elt->op); 1127 pred = create_logical_pred(elt->op);
1032 if (call) { 1128 goto add_pred;
1033 err = filter_add_pred(ps, call, pred);
1034 filter_free_pred(pred);
1035 } else
1036 err = filter_add_subsystem_pred(ps, system,
1037 pred, filter_string);
1038 if (err)
1039 return err;
1040
1041 operand1 = operand2 = NULL;
1042 continue;
1043 } 1129 }
1044 1130
1045 if (!operand1 || !operand2) { 1131 if (!operand1 || !operand2) {
@@ -1048,12 +1134,15 @@ static int replace_preds(struct event_subsystem *system,
1048 } 1134 }
1049 1135
1050 pred = create_pred(elt->op, operand1, operand2); 1136 pred = create_pred(elt->op, operand1, operand2);
1051 if (call) { 1137add_pred:
1052 err = filter_add_pred(ps, call, pred); 1138 if (!pred)
1053 filter_free_pred(pred); 1139 return -ENOMEM;
1054 } else 1140 if (call)
1141 err = filter_add_pred(ps, call, pred, false);
1142 else
1055 err = filter_add_subsystem_pred(ps, system, pred, 1143 err = filter_add_subsystem_pred(ps, system, pred,
1056 filter_string); 1144 filter_string, dry_run);
1145 filter_free_pred(pred);
1057 if (err) 1146 if (err)
1058 return err; 1147 return err;
1059 1148
@@ -1071,6 +1160,10 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
1071 1160
1072 mutex_lock(&event_mutex); 1161 mutex_lock(&event_mutex);
1073 1162
1163 err = init_preds(call);
1164 if (err)
1165 goto out_unlock;
1166
1074 if (!strcmp(strstrip(filter_string), "0")) { 1167 if (!strcmp(strstrip(filter_string), "0")) {
1075 filter_disable_preds(call); 1168 filter_disable_preds(call);
1076 remove_filter_string(call->filter); 1169 remove_filter_string(call->filter);
@@ -1093,7 +1186,7 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
1093 goto out; 1186 goto out;
1094 } 1187 }
1095 1188
1096 err = replace_preds(NULL, call, ps, filter_string); 1189 err = replace_preds(NULL, call, ps, filter_string, false);
1097 if (err) 1190 if (err)
1098 append_filter_err(ps, call->filter); 1191 append_filter_err(ps, call->filter);
1099 1192
@@ -1116,8 +1209,12 @@ int apply_subsystem_event_filter(struct event_subsystem *system,
1116 1209
1117 mutex_lock(&event_mutex); 1210 mutex_lock(&event_mutex);
1118 1211
1212 err = init_subsystem_preds(system);
1213 if (err)
1214 goto out_unlock;
1215
1119 if (!strcmp(strstrip(filter_string), "0")) { 1216 if (!strcmp(strstrip(filter_string), "0")) {
1120 filter_free_subsystem_preds(system); 1217 filter_free_subsystem_preds(system, FILTER_DISABLE_ALL);
1121 remove_filter_string(system->filter); 1218 remove_filter_string(system->filter);
1122 mutex_unlock(&event_mutex); 1219 mutex_unlock(&event_mutex);
1123 return 0; 1220 return 0;
@@ -1128,7 +1225,6 @@ int apply_subsystem_event_filter(struct event_subsystem *system,
1128 if (!ps) 1225 if (!ps)
1129 goto out_unlock; 1226 goto out_unlock;
1130 1227
1131 filter_free_subsystem_preds(system);
1132 replace_filter_string(system->filter, filter_string); 1228 replace_filter_string(system->filter, filter_string);
1133 1229
1134 parse_init(ps, filter_ops, filter_string); 1230 parse_init(ps, filter_ops, filter_string);
@@ -1138,9 +1234,23 @@ int apply_subsystem_event_filter(struct event_subsystem *system,
1138 goto out; 1234 goto out;
1139 } 1235 }
1140 1236
1141 err = replace_preds(system, NULL, ps, filter_string); 1237 filter_free_subsystem_preds(system, FILTER_INIT_NO_RESET);
1142 if (err) 1238
1239 /* try to see the filter can be applied to which events */
1240 err = replace_preds(system, NULL, ps, filter_string, true);
1241 if (err) {
1143 append_filter_err(ps, system->filter); 1242 append_filter_err(ps, system->filter);
1243 goto out;
1244 }
1245
1246 filter_free_subsystem_preds(system, FILTER_SKIP_NO_RESET);
1247
1248 /* really apply the filter to the events */
1249 err = replace_preds(system, NULL, ps, filter_string, false);
1250 if (err) {
1251 append_filter_err(ps, system->filter);
1252 filter_free_subsystem_preds(system, 2);
1253 }
1144 1254
1145out: 1255out:
1146 filter_opstack_clear(ps); 1256 filter_opstack_clear(ps);
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index d06cf898dc86..9753fcc61bc5 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -15,116 +15,209 @@
15 15
16#include "trace_output.h" 16#include "trace_output.h"
17 17
18#undef TRACE_SYSTEM
19#define TRACE_SYSTEM ftrace
18 20
19#undef TRACE_STRUCT 21/* not needed for this file */
20#define TRACE_STRUCT(args...) args 22#undef __field_struct
23#define __field_struct(type, item)
21 24
22extern void __bad_type_size(void); 25#undef __field
26#define __field(type, item) type item;
23 27
24#undef TRACE_FIELD 28#undef __field_desc
25#define TRACE_FIELD(type, item, assign) \ 29#define __field_desc(type, container, item) type item;
26 if (sizeof(type) != sizeof(field.item)) \ 30
27 __bad_type_size(); \ 31#undef __array
32#define __array(type, item, size) type item[size];
33
34#undef __array_desc
35#define __array_desc(type, container, item, size) type item[size];
36
37#undef __dynamic_array
38#define __dynamic_array(type, item) type item[];
39
40#undef F_STRUCT
41#define F_STRUCT(args...) args
42
43#undef F_printk
44#define F_printk(fmt, args...) fmt, args
45
46#undef FTRACE_ENTRY
47#define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \
48struct ____ftrace_##name { \
49 tstruct \
50}; \
51static void __used ____ftrace_check_##name(void) \
52{ \
53 struct ____ftrace_##name *__entry = NULL; \
54 \
55 /* force cmpile-time check on F_printk() */ \
56 printk(print); \
57}
58
59#undef FTRACE_ENTRY_DUP
60#define FTRACE_ENTRY_DUP(name, struct_name, id, tstruct, print) \
61 FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print))
62
63#include "trace_entries.h"
64
65
66#undef __field
67#define __field(type, item) \
28 ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ 68 ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \
29 "offset:%u;\tsize:%u;\n", \ 69 "offset:%zu;\tsize:%zu;\n", \
30 (unsigned int)offsetof(typeof(field), item), \ 70 offsetof(typeof(field), item), \
31 (unsigned int)sizeof(field.item)); \ 71 sizeof(field.item)); \
32 if (!ret) \ 72 if (!ret) \
33 return 0; 73 return 0;
34 74
75#undef __field_desc
76#define __field_desc(type, container, item) \
77 ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \
78 "offset:%zu;\tsize:%zu;\n", \
79 offsetof(typeof(field), container.item), \
80 sizeof(field.container.item)); \
81 if (!ret) \
82 return 0;
83
84#undef __array
85#define __array(type, item, len) \
86 ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \
87 "offset:%zu;\tsize:%zu;\n", \
88 offsetof(typeof(field), item), \
89 sizeof(field.item)); \
90 if (!ret) \
91 return 0;
35 92
36#undef TRACE_FIELD_SPECIAL 93#undef __array_desc
37#define TRACE_FIELD_SPECIAL(type_item, item, len, cmd) \ 94#define __array_desc(type, container, item, len) \
38 ret = trace_seq_printf(s, "\tfield special:" #type_item ";\t" \ 95 ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \
39 "offset:%u;\tsize:%u;\n", \ 96 "offset:%zu;\tsize:%zu;\n", \
40 (unsigned int)offsetof(typeof(field), item), \ 97 offsetof(typeof(field), container.item), \
41 (unsigned int)sizeof(field.item)); \ 98 sizeof(field.container.item)); \
42 if (!ret) \ 99 if (!ret) \
43 return 0; 100 return 0;
44 101
45#undef TRACE_FIELD_ZERO_CHAR 102#undef __dynamic_array
46#define TRACE_FIELD_ZERO_CHAR(item) \ 103#define __dynamic_array(type, item) \
47 ret = trace_seq_printf(s, "\tfield:char " #item ";\t" \ 104 ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \
48 "offset:%u;\tsize:0;\n", \ 105 "offset:%zu;\tsize:0;\n", \
49 (unsigned int)offsetof(typeof(field), item)); \ 106 offsetof(typeof(field), item)); \
50 if (!ret) \ 107 if (!ret) \
51 return 0; 108 return 0;
52 109
53#undef TRACE_FIELD_SIGN 110#undef F_printk
54#define TRACE_FIELD_SIGN(type, item, assign, is_signed) \ 111#define F_printk(fmt, args...) "%s, %s\n", #fmt, __stringify(args)
55 TRACE_FIELD(type, item, assign)
56 112
57#undef TP_RAW_FMT 113#undef __entry
58#define TP_RAW_FMT(args...) args 114#define __entry REC
59 115
60#undef TRACE_EVENT_FORMAT 116#undef FTRACE_ENTRY
61#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \ 117#define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \
62static int \ 118static int \
63ftrace_format_##call(struct trace_seq *s) \ 119ftrace_format_##name(struct ftrace_event_call *unused, \
120 struct trace_seq *s) \
64{ \ 121{ \
65 struct args field; \ 122 struct struct_name field __attribute__((unused)); \
66 int ret; \ 123 int ret = 0; \
67 \ 124 \
68 tstruct; \ 125 tstruct; \
69 \ 126 \
70 trace_seq_printf(s, "\nprint fmt: \"%s\"\n", tpfmt); \ 127 trace_seq_printf(s, "\nprint fmt: " print); \
71 \ 128 \
72 return ret; \ 129 return ret; \
73} 130}
74 131
75#undef TRACE_EVENT_FORMAT_NOFILTER 132#include "trace_entries.h"
76#define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct, \ 133
77 tpfmt) \ 134
78static int \ 135#undef __field
79ftrace_format_##call(struct trace_seq *s) \ 136#define __field(type, item) \
137 ret = trace_define_field(event_call, #type, #item, \
138 offsetof(typeof(field), item), \
139 sizeof(field.item), \
140 is_signed_type(type), FILTER_OTHER); \
141 if (ret) \
142 return ret;
143
144#undef __field_desc
145#define __field_desc(type, container, item) \
146 ret = trace_define_field(event_call, #type, #item, \
147 offsetof(typeof(field), \
148 container.item), \
149 sizeof(field.container.item), \
150 is_signed_type(type), FILTER_OTHER); \
151 if (ret) \
152 return ret;
153
154#undef __array
155#define __array(type, item, len) \
156 BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \
157 ret = trace_define_field(event_call, #type "[" #len "]", #item, \
158 offsetof(typeof(field), item), \
159 sizeof(field.item), 0, FILTER_OTHER); \
160 if (ret) \
161 return ret;
162
163#undef __array_desc
164#define __array_desc(type, container, item, len) \
165 BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \
166 ret = trace_define_field(event_call, #type "[" #len "]", #item, \
167 offsetof(typeof(field), \
168 container.item), \
169 sizeof(field.container.item), 0, \
170 FILTER_OTHER); \
171 if (ret) \
172 return ret;
173
174#undef __dynamic_array
175#define __dynamic_array(type, item)
176
177#undef FTRACE_ENTRY
178#define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \
179int \
180ftrace_define_fields_##name(struct ftrace_event_call *event_call) \
80{ \ 181{ \
81 struct args field; \ 182 struct struct_name field; \
82 int ret; \ 183 int ret; \
83 \ 184 \
84 tstruct; \ 185 ret = trace_define_common_fields(event_call); \
186 if (ret) \
187 return ret; \
85 \ 188 \
86 trace_seq_printf(s, "\nprint fmt: \"%s\"\n", tpfmt); \ 189 tstruct; \
87 \ 190 \
88 return ret; \ 191 return ret; \
89} 192}
90 193
91#include "trace_event_types.h" 194#include "trace_entries.h"
92
93#undef TRACE_ZERO_CHAR
94#define TRACE_ZERO_CHAR(arg)
95 195
96#undef TRACE_FIELD
97#define TRACE_FIELD(type, item, assign)\
98 entry->item = assign;
99 196
100#undef TRACE_FIELD 197#undef __field
101#define TRACE_FIELD(type, item, assign)\ 198#define __field(type, item)
102 entry->item = assign;
103 199
104#undef TRACE_FIELD_SIGN 200#undef __field_desc
105#define TRACE_FIELD_SIGN(type, item, assign, is_signed) \ 201#define __field_desc(type, container, item)
106 TRACE_FIELD(type, item, assign)
107 202
108#undef TP_CMD 203#undef __array
109#define TP_CMD(cmd...) cmd 204#define __array(type, item, len)
110 205
111#undef TRACE_ENTRY 206#undef __array_desc
112#define TRACE_ENTRY entry 207#define __array_desc(type, container, item, len)
113 208
114#undef TRACE_FIELD_SPECIAL 209#undef __dynamic_array
115#define TRACE_FIELD_SPECIAL(type_item, item, len, cmd) \ 210#define __dynamic_array(type, item)
116 cmd;
117 211
118#undef TRACE_EVENT_FORMAT 212#undef FTRACE_ENTRY
119#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \ 213#define FTRACE_ENTRY(call, struct_name, type, tstruct, print) \
120int ftrace_define_fields_##call(void); \
121static int ftrace_raw_init_event_##call(void); \ 214static int ftrace_raw_init_event_##call(void); \
122 \ 215 \
123struct ftrace_event_call __used \ 216struct ftrace_event_call __used \
124__attribute__((__aligned__(4))) \ 217__attribute__((__aligned__(4))) \
125__attribute__((section("_ftrace_events"))) event_##call = { \ 218__attribute__((section("_ftrace_events"))) event_##call = { \
126 .name = #call, \ 219 .name = #call, \
127 .id = proto, \ 220 .id = type, \
128 .system = __stringify(TRACE_SYSTEM), \ 221 .system = __stringify(TRACE_SYSTEM), \
129 .raw_init = ftrace_raw_init_event_##call, \ 222 .raw_init = ftrace_raw_init_event_##call, \
130 .show_format = ftrace_format_##call, \ 223 .show_format = ftrace_format_##call, \
@@ -133,74 +226,7 @@ __attribute__((section("_ftrace_events"))) event_##call = { \
133static int ftrace_raw_init_event_##call(void) \ 226static int ftrace_raw_init_event_##call(void) \
134{ \ 227{ \
135 INIT_LIST_HEAD(&event_##call.fields); \ 228 INIT_LIST_HEAD(&event_##call.fields); \
136 init_preds(&event_##call); \
137 return 0; \ 229 return 0; \
138} \ 230} \
139 231
140#undef TRACE_EVENT_FORMAT_NOFILTER 232#include "trace_entries.h"
141#define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct, \
142 tpfmt) \
143 \
144struct ftrace_event_call __used \
145__attribute__((__aligned__(4))) \
146__attribute__((section("_ftrace_events"))) event_##call = { \
147 .name = #call, \
148 .id = proto, \
149 .system = __stringify(TRACE_SYSTEM), \
150 .show_format = ftrace_format_##call, \
151};
152
153#include "trace_event_types.h"
154
155#undef TRACE_FIELD
156#define TRACE_FIELD(type, item, assign) \
157 ret = trace_define_field(event_call, #type, #item, \
158 offsetof(typeof(field), item), \
159 sizeof(field.item), is_signed_type(type)); \
160 if (ret) \
161 return ret;
162
163#undef TRACE_FIELD_SPECIAL
164#define TRACE_FIELD_SPECIAL(type, item, len, cmd) \
165 ret = trace_define_field(event_call, #type "[" #len "]", #item, \
166 offsetof(typeof(field), item), \
167 sizeof(field.item), 0); \
168 if (ret) \
169 return ret;
170
171#undef TRACE_FIELD_SIGN
172#define TRACE_FIELD_SIGN(type, item, assign, is_signed) \
173 ret = trace_define_field(event_call, #type, #item, \
174 offsetof(typeof(field), item), \
175 sizeof(field.item), is_signed); \
176 if (ret) \
177 return ret;
178
179#undef TRACE_FIELD_ZERO_CHAR
180#define TRACE_FIELD_ZERO_CHAR(item)
181
182#undef TRACE_EVENT_FORMAT
183#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \
184int \
185ftrace_define_fields_##call(void) \
186{ \
187 struct ftrace_event_call *event_call = &event_##call; \
188 struct args field; \
189 int ret; \
190 \
191 __common_field(unsigned char, type, 0); \
192 __common_field(unsigned char, flags, 0); \
193 __common_field(unsigned char, preempt_count, 0); \
194 __common_field(int, pid, 1); \
195 __common_field(int, tgid, 1); \
196 \
197 tstruct; \
198 \
199 return ret; \
200}
201
202#undef TRACE_EVENT_FORMAT_NOFILTER
203#define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct, \
204 tpfmt)
205
206#include "trace_event_types.h"
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 75ef000613c3..b3f3776b0cd6 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -288,11 +288,9 @@ static int
288ftrace_trace_onoff_print(struct seq_file *m, unsigned long ip, 288ftrace_trace_onoff_print(struct seq_file *m, unsigned long ip,
289 struct ftrace_probe_ops *ops, void *data) 289 struct ftrace_probe_ops *ops, void *data)
290{ 290{
291 char str[KSYM_SYMBOL_LEN];
292 long count = (long)data; 291 long count = (long)data;
293 292
294 kallsyms_lookup(ip, NULL, NULL, NULL, str); 293 seq_printf(m, "%ps:", (void *)ip);
295 seq_printf(m, "%s:", str);
296 294
297 if (ops == &traceon_probe_ops) 295 if (ops == &traceon_probe_ops)
298 seq_printf(m, "traceon"); 296 seq_printf(m, "traceon");
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index d2249abafb53..45e6c01b2e4d 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -52,7 +52,7 @@ static struct tracer_flags tracer_flags = {
52 .opts = trace_opts 52 .opts = trace_opts
53}; 53};
54 54
55/* pid on the last trace processed */ 55static struct trace_array *graph_array;
56 56
57 57
58/* Add a function return address to the trace stack on thread info.*/ 58/* Add a function return address to the trace stack on thread info.*/
@@ -124,7 +124,7 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret,
124 if (unlikely(current->ret_stack[index].fp != frame_pointer)) { 124 if (unlikely(current->ret_stack[index].fp != frame_pointer)) {
125 ftrace_graph_stop(); 125 ftrace_graph_stop();
126 WARN(1, "Bad frame pointer: expected %lx, received %lx\n" 126 WARN(1, "Bad frame pointer: expected %lx, received %lx\n"
127 " from func %pF return to %lx\n", 127 " from func %ps return to %lx\n",
128 current->ret_stack[index].fp, 128 current->ret_stack[index].fp,
129 frame_pointer, 129 frame_pointer,
130 (void *)current->ret_stack[index].func, 130 (void *)current->ret_stack[index].func,
@@ -166,10 +166,123 @@ unsigned long ftrace_return_to_handler(unsigned long frame_pointer)
166 return ret; 166 return ret;
167} 167}
168 168
169static int __trace_graph_entry(struct trace_array *tr,
170 struct ftrace_graph_ent *trace,
171 unsigned long flags,
172 int pc)
173{
174 struct ftrace_event_call *call = &event_funcgraph_entry;
175 struct ring_buffer_event *event;
176 struct ring_buffer *buffer = tr->buffer;
177 struct ftrace_graph_ent_entry *entry;
178
179 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
180 return 0;
181
182 event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_ENT,
183 sizeof(*entry), flags, pc);
184 if (!event)
185 return 0;
186 entry = ring_buffer_event_data(event);
187 entry->graph_ent = *trace;
188 if (!filter_current_check_discard(buffer, call, entry, event))
189 ring_buffer_unlock_commit(buffer, event);
190
191 return 1;
192}
193
194int trace_graph_entry(struct ftrace_graph_ent *trace)
195{
196 struct trace_array *tr = graph_array;
197 struct trace_array_cpu *data;
198 unsigned long flags;
199 long disabled;
200 int ret;
201 int cpu;
202 int pc;
203
204 if (unlikely(!tr))
205 return 0;
206
207 if (!ftrace_trace_task(current))
208 return 0;
209
210 if (!ftrace_graph_addr(trace->func))
211 return 0;
212
213 local_irq_save(flags);
214 cpu = raw_smp_processor_id();
215 data = tr->data[cpu];
216 disabled = atomic_inc_return(&data->disabled);
217 if (likely(disabled == 1)) {
218 pc = preempt_count();
219 ret = __trace_graph_entry(tr, trace, flags, pc);
220 } else {
221 ret = 0;
222 }
223 /* Only do the atomic if it is not already set */
224 if (!test_tsk_trace_graph(current))
225 set_tsk_trace_graph(current);
226
227 atomic_dec(&data->disabled);
228 local_irq_restore(flags);
229
230 return ret;
231}
232
233static void __trace_graph_return(struct trace_array *tr,
234 struct ftrace_graph_ret *trace,
235 unsigned long flags,
236 int pc)
237{
238 struct ftrace_event_call *call = &event_funcgraph_exit;
239 struct ring_buffer_event *event;
240 struct ring_buffer *buffer = tr->buffer;
241 struct ftrace_graph_ret_entry *entry;
242
243 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
244 return;
245
246 event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_RET,
247 sizeof(*entry), flags, pc);
248 if (!event)
249 return;
250 entry = ring_buffer_event_data(event);
251 entry->ret = *trace;
252 if (!filter_current_check_discard(buffer, call, entry, event))
253 ring_buffer_unlock_commit(buffer, event);
254}
255
256void trace_graph_return(struct ftrace_graph_ret *trace)
257{
258 struct trace_array *tr = graph_array;
259 struct trace_array_cpu *data;
260 unsigned long flags;
261 long disabled;
262 int cpu;
263 int pc;
264
265 local_irq_save(flags);
266 cpu = raw_smp_processor_id();
267 data = tr->data[cpu];
268 disabled = atomic_inc_return(&data->disabled);
269 if (likely(disabled == 1)) {
270 pc = preempt_count();
271 __trace_graph_return(tr, trace, flags, pc);
272 }
273 if (!trace->depth)
274 clear_tsk_trace_graph(current);
275 atomic_dec(&data->disabled);
276 local_irq_restore(flags);
277}
278
169static int graph_trace_init(struct trace_array *tr) 279static int graph_trace_init(struct trace_array *tr)
170{ 280{
171 int ret = register_ftrace_graph(&trace_graph_return, 281 int ret;
172 &trace_graph_entry); 282
283 graph_array = tr;
284 ret = register_ftrace_graph(&trace_graph_return,
285 &trace_graph_entry);
173 if (ret) 286 if (ret)
174 return ret; 287 return ret;
175 tracing_start_cmdline_record(); 288 tracing_start_cmdline_record();
@@ -177,49 +290,30 @@ static int graph_trace_init(struct trace_array *tr)
177 return 0; 290 return 0;
178} 291}
179 292
293void set_graph_array(struct trace_array *tr)
294{
295 graph_array = tr;
296}
297
180static void graph_trace_reset(struct trace_array *tr) 298static void graph_trace_reset(struct trace_array *tr)
181{ 299{
182 tracing_stop_cmdline_record(); 300 tracing_stop_cmdline_record();
183 unregister_ftrace_graph(); 301 unregister_ftrace_graph();
184} 302}
185 303
186static inline int log10_cpu(int nb) 304static int max_bytes_for_cpu;
187{
188 if (nb / 100)
189 return 3;
190 if (nb / 10)
191 return 2;
192 return 1;
193}
194 305
195static enum print_line_t 306static enum print_line_t
196print_graph_cpu(struct trace_seq *s, int cpu) 307print_graph_cpu(struct trace_seq *s, int cpu)
197{ 308{
198 int i;
199 int ret; 309 int ret;
200 int log10_this = log10_cpu(cpu);
201 int log10_all = log10_cpu(cpumask_weight(cpu_online_mask));
202
203 310
204 /* 311 /*
205 * Start with a space character - to make it stand out 312 * Start with a space character - to make it stand out
206 * to the right a bit when trace output is pasted into 313 * to the right a bit when trace output is pasted into
207 * email: 314 * email:
208 */ 315 */
209 ret = trace_seq_printf(s, " "); 316 ret = trace_seq_printf(s, " %*d) ", max_bytes_for_cpu, cpu);
210
211 /*
212 * Tricky - we space the CPU field according to the max
213 * number of online CPUs. On a 2-cpu system it would take
214 * a maximum of 1 digit - on a 128 cpu system it would
215 * take up to 3 digits:
216 */
217 for (i = 0; i < log10_all - log10_this; i++) {
218 ret = trace_seq_printf(s, " ");
219 if (!ret)
220 return TRACE_TYPE_PARTIAL_LINE;
221 }
222 ret = trace_seq_printf(s, "%d) ", cpu);
223 if (!ret) 317 if (!ret)
224 return TRACE_TYPE_PARTIAL_LINE; 318 return TRACE_TYPE_PARTIAL_LINE;
225 319
@@ -270,6 +364,15 @@ print_graph_proc(struct trace_seq *s, pid_t pid)
270} 364}
271 365
272 366
367static enum print_line_t
368print_graph_lat_fmt(struct trace_seq *s, struct trace_entry *entry)
369{
370 if (!trace_seq_putc(s, ' '))
371 return 0;
372
373 return trace_print_lat_fmt(s, entry);
374}
375
273/* If the pid changed since the last trace, output this event */ 376/* If the pid changed since the last trace, output this event */
274static enum print_line_t 377static enum print_line_t
275verif_pid(struct trace_seq *s, pid_t pid, int cpu, struct fgraph_data *data) 378verif_pid(struct trace_seq *s, pid_t pid, int cpu, struct fgraph_data *data)
@@ -427,6 +530,7 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr,
427 if (ret == TRACE_TYPE_PARTIAL_LINE) 530 if (ret == TRACE_TYPE_PARTIAL_LINE)
428 return TRACE_TYPE_PARTIAL_LINE; 531 return TRACE_TYPE_PARTIAL_LINE;
429 } 532 }
533
430 /* Proc */ 534 /* Proc */
431 if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) { 535 if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) {
432 ret = print_graph_proc(s, pid); 536 ret = print_graph_proc(s, pid);
@@ -565,11 +669,7 @@ print_graph_entry_leaf(struct trace_iterator *iter,
565 return TRACE_TYPE_PARTIAL_LINE; 669 return TRACE_TYPE_PARTIAL_LINE;
566 } 670 }
567 671
568 ret = seq_print_ip_sym(s, call->func, 0); 672 ret = trace_seq_printf(s, "%ps();\n", (void *)call->func);
569 if (!ret)
570 return TRACE_TYPE_PARTIAL_LINE;
571
572 ret = trace_seq_printf(s, "();\n");
573 if (!ret) 673 if (!ret)
574 return TRACE_TYPE_PARTIAL_LINE; 674 return TRACE_TYPE_PARTIAL_LINE;
575 675
@@ -612,11 +712,7 @@ print_graph_entry_nested(struct trace_iterator *iter,
612 return TRACE_TYPE_PARTIAL_LINE; 712 return TRACE_TYPE_PARTIAL_LINE;
613 } 713 }
614 714
615 ret = seq_print_ip_sym(s, call->func, 0); 715 ret = trace_seq_printf(s, "%ps() {\n", (void *)call->func);
616 if (!ret)
617 return TRACE_TYPE_PARTIAL_LINE;
618
619 ret = trace_seq_printf(s, "() {\n");
620 if (!ret) 716 if (!ret)
621 return TRACE_TYPE_PARTIAL_LINE; 717 return TRACE_TYPE_PARTIAL_LINE;
622 718
@@ -672,6 +768,13 @@ print_graph_prologue(struct trace_iterator *iter, struct trace_seq *s,
672 return TRACE_TYPE_PARTIAL_LINE; 768 return TRACE_TYPE_PARTIAL_LINE;
673 } 769 }
674 770
771 /* Latency format */
772 if (trace_flags & TRACE_ITER_LATENCY_FMT) {
773 ret = print_graph_lat_fmt(s, ent);
774 if (ret == TRACE_TYPE_PARTIAL_LINE)
775 return TRACE_TYPE_PARTIAL_LINE;
776 }
777
675 return 0; 778 return 0;
676} 779}
677 780
@@ -843,9 +946,16 @@ print_graph_function(struct trace_iterator *iter)
843 946
844 switch (entry->type) { 947 switch (entry->type) {
845 case TRACE_GRAPH_ENT: { 948 case TRACE_GRAPH_ENT: {
846 struct ftrace_graph_ent_entry *field; 949 /*
950 * print_graph_entry() may consume the current event,
951 * thus @field may become invalid, so we need to save it.
952 * sizeof(struct ftrace_graph_ent_entry) is very small,
953 * it can be safely saved at the stack.
954 */
955 struct ftrace_graph_ent_entry *field, saved;
847 trace_assign_type(field, entry); 956 trace_assign_type(field, entry);
848 return print_graph_entry(field, s, iter); 957 saved = *field;
958 return print_graph_entry(&saved, s, iter);
849 } 959 }
850 case TRACE_GRAPH_RET: { 960 case TRACE_GRAPH_RET: {
851 struct ftrace_graph_ret_entry *field; 961 struct ftrace_graph_ret_entry *field;
@@ -859,28 +969,59 @@ print_graph_function(struct trace_iterator *iter)
859 return TRACE_TYPE_HANDLED; 969 return TRACE_TYPE_HANDLED;
860} 970}
861 971
972static void print_lat_header(struct seq_file *s)
973{
974 static const char spaces[] = " " /* 16 spaces */
975 " " /* 4 spaces */
976 " "; /* 17 spaces */
977 int size = 0;
978
979 if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME)
980 size += 16;
981 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU)
982 size += 4;
983 if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC)
984 size += 17;
985
986 seq_printf(s, "#%.*s _-----=> irqs-off \n", size, spaces);
987 seq_printf(s, "#%.*s / _----=> need-resched \n", size, spaces);
988 seq_printf(s, "#%.*s| / _---=> hardirq/softirq \n", size, spaces);
989 seq_printf(s, "#%.*s|| / _--=> preempt-depth \n", size, spaces);
990 seq_printf(s, "#%.*s||| / _-=> lock-depth \n", size, spaces);
991 seq_printf(s, "#%.*s|||| / \n", size, spaces);
992}
993
862static void print_graph_headers(struct seq_file *s) 994static void print_graph_headers(struct seq_file *s)
863{ 995{
996 int lat = trace_flags & TRACE_ITER_LATENCY_FMT;
997
998 if (lat)
999 print_lat_header(s);
1000
864 /* 1st line */ 1001 /* 1st line */
865 seq_printf(s, "# "); 1002 seq_printf(s, "#");
866 if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME) 1003 if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME)
867 seq_printf(s, " TIME "); 1004 seq_printf(s, " TIME ");
868 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) 1005 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU)
869 seq_printf(s, "CPU"); 1006 seq_printf(s, " CPU");
870 if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) 1007 if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC)
871 seq_printf(s, " TASK/PID "); 1008 seq_printf(s, " TASK/PID ");
1009 if (lat)
1010 seq_printf(s, "|||||");
872 if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) 1011 if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION)
873 seq_printf(s, " DURATION "); 1012 seq_printf(s, " DURATION ");
874 seq_printf(s, " FUNCTION CALLS\n"); 1013 seq_printf(s, " FUNCTION CALLS\n");
875 1014
876 /* 2nd line */ 1015 /* 2nd line */
877 seq_printf(s, "# "); 1016 seq_printf(s, "#");
878 if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME) 1017 if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME)
879 seq_printf(s, " | "); 1018 seq_printf(s, " | ");
880 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) 1019 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU)
881 seq_printf(s, "| "); 1020 seq_printf(s, " | ");
882 if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) 1021 if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC)
883 seq_printf(s, " | | "); 1022 seq_printf(s, " | | ");
1023 if (lat)
1024 seq_printf(s, "|||||");
884 if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) 1025 if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION)
885 seq_printf(s, " | | "); 1026 seq_printf(s, " | | ");
886 seq_printf(s, " | | | |\n"); 1027 seq_printf(s, " | | | |\n");
@@ -927,6 +1068,8 @@ static struct tracer graph_trace __read_mostly = {
927 1068
928static __init int init_graph_trace(void) 1069static __init int init_graph_trace(void)
929{ 1070{
1071 max_bytes_for_cpu = snprintf(NULL, 0, "%d", nr_cpu_ids - 1);
1072
930 return register_tracer(&graph_trace); 1073 return register_tracer(&graph_trace);
931} 1074}
932 1075
diff --git a/kernel/trace/trace_hw_branches.c b/kernel/trace/trace_hw_branches.c
index ca7d7c4d0c2a..23b63859130e 100644
--- a/kernel/trace/trace_hw_branches.c
+++ b/kernel/trace/trace_hw_branches.c
@@ -155,7 +155,7 @@ static enum print_line_t bts_trace_print_line(struct trace_iterator *iter)
155 seq_print_ip_sym(seq, it->from, symflags) && 155 seq_print_ip_sym(seq, it->from, symflags) &&
156 trace_seq_printf(seq, "\n")) 156 trace_seq_printf(seq, "\n"))
157 return TRACE_TYPE_HANDLED; 157 return TRACE_TYPE_HANDLED;
158 return TRACE_TYPE_PARTIAL_LINE;; 158 return TRACE_TYPE_PARTIAL_LINE;
159 } 159 }
160 return TRACE_TYPE_UNHANDLED; 160 return TRACE_TYPE_UNHANDLED;
161} 161}
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index b923d13e2fad..3aa7eaa2114c 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -129,15 +129,10 @@ check_critical_timing(struct trace_array *tr,
129 unsigned long parent_ip, 129 unsigned long parent_ip,
130 int cpu) 130 int cpu)
131{ 131{
132 unsigned long latency, t0, t1;
133 cycle_t T0, T1, delta; 132 cycle_t T0, T1, delta;
134 unsigned long flags; 133 unsigned long flags;
135 int pc; 134 int pc;
136 135
137 /*
138 * usecs conversion is slow so we try to delay the conversion
139 * as long as possible:
140 */
141 T0 = data->preempt_timestamp; 136 T0 = data->preempt_timestamp;
142 T1 = ftrace_now(cpu); 137 T1 = ftrace_now(cpu);
143 delta = T1-T0; 138 delta = T1-T0;
@@ -157,18 +152,15 @@ check_critical_timing(struct trace_array *tr,
157 152
158 trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc); 153 trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc);
159 154
160 latency = nsecs_to_usecs(delta);
161
162 if (data->critical_sequence != max_sequence) 155 if (data->critical_sequence != max_sequence)
163 goto out_unlock; 156 goto out_unlock;
164 157
165 tracing_max_latency = delta;
166 t0 = nsecs_to_usecs(T0);
167 t1 = nsecs_to_usecs(T1);
168
169 data->critical_end = parent_ip; 158 data->critical_end = parent_ip;
170 159
171 update_max_tr_single(tr, current, cpu); 160 if (likely(!is_tracing_stopped())) {
161 tracing_max_latency = delta;
162 update_max_tr_single(tr, current, cpu);
163 }
172 164
173 max_sequence++; 165 max_sequence++;
174 166
@@ -178,7 +170,6 @@ out_unlock:
178out: 170out:
179 data->critical_sequence = max_sequence; 171 data->critical_sequence = max_sequence;
180 data->preempt_timestamp = ftrace_now(cpu); 172 data->preempt_timestamp = ftrace_now(cpu);
181 tracing_reset(tr, cpu);
182 trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc); 173 trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc);
183} 174}
184 175
@@ -208,7 +199,6 @@ start_critical_timing(unsigned long ip, unsigned long parent_ip)
208 data->critical_sequence = max_sequence; 199 data->critical_sequence = max_sequence;
209 data->preempt_timestamp = ftrace_now(cpu); 200 data->preempt_timestamp = ftrace_now(cpu);
210 data->critical_start = parent_ip ? : ip; 201 data->critical_start = parent_ip ? : ip;
211 tracing_reset(tr, cpu);
212 202
213 local_save_flags(flags); 203 local_save_flags(flags);
214 204
@@ -379,6 +369,7 @@ static void __irqsoff_tracer_init(struct trace_array *tr)
379 irqsoff_trace = tr; 369 irqsoff_trace = tr;
380 /* make sure that the tracer is visible */ 370 /* make sure that the tracer is visible */
381 smp_wmb(); 371 smp_wmb();
372 tracing_reset_online_cpus(tr);
382 start_irqsoff_tracer(tr); 373 start_irqsoff_tracer(tr);
383} 374}
384 375
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index d53b45ed0806..0acd834659ed 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -307,11 +307,13 @@ static void __trace_mmiotrace_rw(struct trace_array *tr,
307 struct trace_array_cpu *data, 307 struct trace_array_cpu *data,
308 struct mmiotrace_rw *rw) 308 struct mmiotrace_rw *rw)
309{ 309{
310 struct ftrace_event_call *call = &event_mmiotrace_rw;
311 struct ring_buffer *buffer = tr->buffer;
310 struct ring_buffer_event *event; 312 struct ring_buffer_event *event;
311 struct trace_mmiotrace_rw *entry; 313 struct trace_mmiotrace_rw *entry;
312 int pc = preempt_count(); 314 int pc = preempt_count();
313 315
314 event = trace_buffer_lock_reserve(tr, TRACE_MMIO_RW, 316 event = trace_buffer_lock_reserve(buffer, TRACE_MMIO_RW,
315 sizeof(*entry), 0, pc); 317 sizeof(*entry), 0, pc);
316 if (!event) { 318 if (!event) {
317 atomic_inc(&dropped_count); 319 atomic_inc(&dropped_count);
@@ -319,7 +321,9 @@ static void __trace_mmiotrace_rw(struct trace_array *tr,
319 } 321 }
320 entry = ring_buffer_event_data(event); 322 entry = ring_buffer_event_data(event);
321 entry->rw = *rw; 323 entry->rw = *rw;
322 trace_buffer_unlock_commit(tr, event, 0, pc); 324
325 if (!filter_check_discard(call, entry, buffer, event))
326 trace_buffer_unlock_commit(buffer, event, 0, pc);
323} 327}
324 328
325void mmio_trace_rw(struct mmiotrace_rw *rw) 329void mmio_trace_rw(struct mmiotrace_rw *rw)
@@ -333,11 +337,13 @@ static void __trace_mmiotrace_map(struct trace_array *tr,
333 struct trace_array_cpu *data, 337 struct trace_array_cpu *data,
334 struct mmiotrace_map *map) 338 struct mmiotrace_map *map)
335{ 339{
340 struct ftrace_event_call *call = &event_mmiotrace_map;
341 struct ring_buffer *buffer = tr->buffer;
336 struct ring_buffer_event *event; 342 struct ring_buffer_event *event;
337 struct trace_mmiotrace_map *entry; 343 struct trace_mmiotrace_map *entry;
338 int pc = preempt_count(); 344 int pc = preempt_count();
339 345
340 event = trace_buffer_lock_reserve(tr, TRACE_MMIO_MAP, 346 event = trace_buffer_lock_reserve(buffer, TRACE_MMIO_MAP,
341 sizeof(*entry), 0, pc); 347 sizeof(*entry), 0, pc);
342 if (!event) { 348 if (!event) {
343 atomic_inc(&dropped_count); 349 atomic_inc(&dropped_count);
@@ -345,7 +351,9 @@ static void __trace_mmiotrace_map(struct trace_array *tr,
345 } 351 }
346 entry = ring_buffer_event_data(event); 352 entry = ring_buffer_event_data(event);
347 entry->map = *map; 353 entry->map = *map;
348 trace_buffer_unlock_commit(tr, event, 0, pc); 354
355 if (!filter_check_discard(call, entry, buffer, event))
356 trace_buffer_unlock_commit(buffer, event, 0, pc);
349} 357}
350 358
351void mmio_trace_mapping(struct mmiotrace_map *map) 359void mmio_trace_mapping(struct mmiotrace_map *map)
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index e0c2545622e8..f572f44c6e1e 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -407,7 +407,7 @@ seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s,
407 * since individual threads might have already quit! 407 * since individual threads might have already quit!
408 */ 408 */
409 rcu_read_lock(); 409 rcu_read_lock();
410 task = find_task_by_vpid(entry->ent.tgid); 410 task = find_task_by_vpid(entry->tgid);
411 if (task) 411 if (task)
412 mm = get_task_mm(task); 412 mm = get_task_mm(task);
413 rcu_read_unlock(); 413 rcu_read_unlock();
@@ -460,18 +460,23 @@ seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
460 return ret; 460 return ret;
461} 461}
462 462
463static int 463/**
464lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu) 464 * trace_print_lat_fmt - print the irq, preempt and lockdep fields
465 * @s: trace seq struct to write to
466 * @entry: The trace entry field from the ring buffer
467 *
468 * Prints the generic fields of irqs off, in hard or softirq, preempt
469 * count and lock depth.
470 */
471int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry)
465{ 472{
466 int hardirq, softirq; 473 int hardirq, softirq;
467 char comm[TASK_COMM_LEN]; 474 int ret;
468 475
469 trace_find_cmdline(entry->pid, comm);
470 hardirq = entry->flags & TRACE_FLAG_HARDIRQ; 476 hardirq = entry->flags & TRACE_FLAG_HARDIRQ;
471 softirq = entry->flags & TRACE_FLAG_SOFTIRQ; 477 softirq = entry->flags & TRACE_FLAG_SOFTIRQ;
472 478
473 if (!trace_seq_printf(s, "%8.8s-%-5d %3d%c%c%c", 479 if (!trace_seq_printf(s, "%c%c%c",
474 comm, entry->pid, cpu,
475 (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' : 480 (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' :
476 (entry->flags & TRACE_FLAG_IRQS_NOSUPPORT) ? 481 (entry->flags & TRACE_FLAG_IRQS_NOSUPPORT) ?
477 'X' : '.', 482 'X' : '.',
@@ -481,9 +486,30 @@ lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
481 hardirq ? 'h' : softirq ? 's' : '.')) 486 hardirq ? 'h' : softirq ? 's' : '.'))
482 return 0; 487 return 0;
483 488
489 if (entry->lock_depth < 0)
490 ret = trace_seq_putc(s, '.');
491 else
492 ret = trace_seq_printf(s, "%d", entry->lock_depth);
493 if (!ret)
494 return 0;
495
484 if (entry->preempt_count) 496 if (entry->preempt_count)
485 return trace_seq_printf(s, "%x", entry->preempt_count); 497 return trace_seq_printf(s, "%x", entry->preempt_count);
486 return trace_seq_puts(s, "."); 498 return trace_seq_putc(s, '.');
499}
500
501static int
502lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
503{
504 char comm[TASK_COMM_LEN];
505
506 trace_find_cmdline(entry->pid, comm);
507
508 if (!trace_seq_printf(s, "%8.8s-%-5d %3d",
509 comm, entry->pid, cpu))
510 return 0;
511
512 return trace_print_lat_fmt(s, entry);
487} 513}
488 514
489static unsigned long preempt_mark_thresh = 100; 515static unsigned long preempt_mark_thresh = 100;
diff --git a/kernel/trace/trace_output.h b/kernel/trace/trace_output.h
index d38bec4a9c30..9d91c72ba38b 100644
--- a/kernel/trace/trace_output.h
+++ b/kernel/trace/trace_output.h
@@ -26,6 +26,8 @@ extern struct trace_event *ftrace_find_event(int type);
26 26
27extern enum print_line_t trace_nop_print(struct trace_iterator *iter, 27extern enum print_line_t trace_nop_print(struct trace_iterator *iter,
28 int flags); 28 int flags);
29extern int
30trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry);
29 31
30/* used by module unregistering */ 32/* used by module unregistering */
31extern int __unregister_ftrace_event(struct trace_event *event); 33extern int __unregister_ftrace_event(struct trace_event *event);
diff --git a/kernel/trace/trace_power.c b/kernel/trace/trace_power.c
deleted file mode 100644
index 8a30d9874cd4..000000000000
--- a/kernel/trace/trace_power.c
+++ /dev/null
@@ -1,214 +0,0 @@
1/*
2 * ring buffer based C-state tracer
3 *
4 * Arjan van de Ven <arjan@linux.intel.com>
5 * Copyright (C) 2008 Intel Corporation
6 *
7 * Much is borrowed from trace_boot.c which is
8 * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
9 *
10 */
11
12#include <linux/init.h>
13#include <linux/debugfs.h>
14#include <trace/power.h>
15#include <linux/kallsyms.h>
16#include <linux/module.h>
17
18#include "trace.h"
19#include "trace_output.h"
20
21static struct trace_array *power_trace;
22static int __read_mostly trace_power_enabled;
23
24static void probe_power_start(struct power_trace *it, unsigned int type,
25 unsigned int level)
26{
27 if (!trace_power_enabled)
28 return;
29
30 memset(it, 0, sizeof(struct power_trace));
31 it->state = level;
32 it->type = type;
33 it->stamp = ktime_get();
34}
35
36
37static void probe_power_end(struct power_trace *it)
38{
39 struct ftrace_event_call *call = &event_power;
40 struct ring_buffer_event *event;
41 struct trace_power *entry;
42 struct trace_array_cpu *data;
43 struct trace_array *tr = power_trace;
44
45 if (!trace_power_enabled)
46 return;
47
48 preempt_disable();
49 it->end = ktime_get();
50 data = tr->data[smp_processor_id()];
51
52 event = trace_buffer_lock_reserve(tr, TRACE_POWER,
53 sizeof(*entry), 0, 0);
54 if (!event)
55 goto out;
56 entry = ring_buffer_event_data(event);
57 entry->state_data = *it;
58 if (!filter_check_discard(call, entry, tr->buffer, event))
59 trace_buffer_unlock_commit(tr, event, 0, 0);
60 out:
61 preempt_enable();
62}
63
64static void probe_power_mark(struct power_trace *it, unsigned int type,
65 unsigned int level)
66{
67 struct ftrace_event_call *call = &event_power;
68 struct ring_buffer_event *event;
69 struct trace_power *entry;
70 struct trace_array_cpu *data;
71 struct trace_array *tr = power_trace;
72
73 if (!trace_power_enabled)
74 return;
75
76 memset(it, 0, sizeof(struct power_trace));
77 it->state = level;
78 it->type = type;
79 it->stamp = ktime_get();
80 preempt_disable();
81 it->end = it->stamp;
82 data = tr->data[smp_processor_id()];
83
84 event = trace_buffer_lock_reserve(tr, TRACE_POWER,
85 sizeof(*entry), 0, 0);
86 if (!event)
87 goto out;
88 entry = ring_buffer_event_data(event);
89 entry->state_data = *it;
90 if (!filter_check_discard(call, entry, tr->buffer, event))
91 trace_buffer_unlock_commit(tr, event, 0, 0);
92 out:
93 preempt_enable();
94}
95
96static int tracing_power_register(void)
97{
98 int ret;
99
100 ret = register_trace_power_start(probe_power_start);
101 if (ret) {
102 pr_info("power trace: Couldn't activate tracepoint"
103 " probe to trace_power_start\n");
104 return ret;
105 }
106 ret = register_trace_power_end(probe_power_end);
107 if (ret) {
108 pr_info("power trace: Couldn't activate tracepoint"
109 " probe to trace_power_end\n");
110 goto fail_start;
111 }
112 ret = register_trace_power_mark(probe_power_mark);
113 if (ret) {
114 pr_info("power trace: Couldn't activate tracepoint"
115 " probe to trace_power_mark\n");
116 goto fail_end;
117 }
118 return ret;
119fail_end:
120 unregister_trace_power_end(probe_power_end);
121fail_start:
122 unregister_trace_power_start(probe_power_start);
123 return ret;
124}
125
126static void start_power_trace(struct trace_array *tr)
127{
128 trace_power_enabled = 1;
129}
130
131static void stop_power_trace(struct trace_array *tr)
132{
133 trace_power_enabled = 0;
134}
135
136static void power_trace_reset(struct trace_array *tr)
137{
138 trace_power_enabled = 0;
139 unregister_trace_power_start(probe_power_start);
140 unregister_trace_power_end(probe_power_end);
141 unregister_trace_power_mark(probe_power_mark);
142}
143
144
145static int power_trace_init(struct trace_array *tr)
146{
147 int cpu;
148 power_trace = tr;
149
150 trace_power_enabled = 1;
151 tracing_power_register();
152
153 for_each_cpu(cpu, cpu_possible_mask)
154 tracing_reset(tr, cpu);
155 return 0;
156}
157
158static enum print_line_t power_print_line(struct trace_iterator *iter)
159{
160 int ret = 0;
161 struct trace_entry *entry = iter->ent;
162 struct trace_power *field ;
163 struct power_trace *it;
164 struct trace_seq *s = &iter->seq;
165 struct timespec stamp;
166 struct timespec duration;
167
168 trace_assign_type(field, entry);
169 it = &field->state_data;
170 stamp = ktime_to_timespec(it->stamp);
171 duration = ktime_to_timespec(ktime_sub(it->end, it->stamp));
172
173 if (entry->type == TRACE_POWER) {
174 if (it->type == POWER_CSTATE)
175 ret = trace_seq_printf(s, "[%5ld.%09ld] CSTATE: Going to C%i on cpu %i for %ld.%09ld\n",
176 stamp.tv_sec,
177 stamp.tv_nsec,
178 it->state, iter->cpu,
179 duration.tv_sec,
180 duration.tv_nsec);
181 if (it->type == POWER_PSTATE)
182 ret = trace_seq_printf(s, "[%5ld.%09ld] PSTATE: Going to P%i on cpu %i\n",
183 stamp.tv_sec,
184 stamp.tv_nsec,
185 it->state, iter->cpu);
186 if (!ret)
187 return TRACE_TYPE_PARTIAL_LINE;
188 return TRACE_TYPE_HANDLED;
189 }
190 return TRACE_TYPE_UNHANDLED;
191}
192
193static void power_print_header(struct seq_file *s)
194{
195 seq_puts(s, "# TIMESTAMP STATE EVENT\n");
196 seq_puts(s, "# | | |\n");
197}
198
199static struct tracer power_tracer __read_mostly =
200{
201 .name = "power",
202 .init = power_trace_init,
203 .start = start_power_trace,
204 .stop = stop_power_trace,
205 .reset = power_trace_reset,
206 .print_line = power_print_line,
207 .print_header = power_print_header,
208};
209
210static int init_power_trace(void)
211{
212 return register_tracer(&power_tracer);
213}
214device_initcall(init_power_trace);
diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c
index 7b6278110827..2547d8813cf0 100644
--- a/kernel/trace/trace_printk.c
+++ b/kernel/trace/trace_printk.c
@@ -11,7 +11,6 @@
11#include <linux/ftrace.h> 11#include <linux/ftrace.h>
12#include <linux/string.h> 12#include <linux/string.h>
13#include <linux/module.h> 13#include <linux/module.h>
14#include <linux/marker.h>
15#include <linux/mutex.h> 14#include <linux/mutex.h>
16#include <linux/ctype.h> 15#include <linux/ctype.h>
17#include <linux/list.h> 16#include <linux/list.h>
@@ -176,7 +175,7 @@ static int t_show(struct seq_file *m, void *v)
176 const char *str = *fmt; 175 const char *str = *fmt;
177 int i; 176 int i;
178 177
179 seq_printf(m, "0x%lx : \"", (unsigned long)fmt); 178 seq_printf(m, "0x%lx : \"", *(unsigned long *)fmt);
180 179
181 /* 180 /*
182 * Tabs and new lines need to be converted. 181 * Tabs and new lines need to be converted.
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index a98106dd979c..5fca0f51fde4 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -20,6 +20,35 @@ static int sched_ref;
20static DEFINE_MUTEX(sched_register_mutex); 20static DEFINE_MUTEX(sched_register_mutex);
21static int sched_stopped; 21static int sched_stopped;
22 22
23
24void
25tracing_sched_switch_trace(struct trace_array *tr,
26 struct task_struct *prev,
27 struct task_struct *next,
28 unsigned long flags, int pc)
29{
30 struct ftrace_event_call *call = &event_context_switch;
31 struct ring_buffer *buffer = tr->buffer;
32 struct ring_buffer_event *event;
33 struct ctx_switch_entry *entry;
34
35 event = trace_buffer_lock_reserve(buffer, TRACE_CTX,
36 sizeof(*entry), flags, pc);
37 if (!event)
38 return;
39 entry = ring_buffer_event_data(event);
40 entry->prev_pid = prev->pid;
41 entry->prev_prio = prev->prio;
42 entry->prev_state = prev->state;
43 entry->next_pid = next->pid;
44 entry->next_prio = next->prio;
45 entry->next_state = next->state;
46 entry->next_cpu = task_cpu(next);
47
48 if (!filter_check_discard(call, entry, buffer, event))
49 trace_buffer_unlock_commit(buffer, event, flags, pc);
50}
51
23static void 52static void
24probe_sched_switch(struct rq *__rq, struct task_struct *prev, 53probe_sched_switch(struct rq *__rq, struct task_struct *prev,
25 struct task_struct *next) 54 struct task_struct *next)
@@ -49,6 +78,36 @@ probe_sched_switch(struct rq *__rq, struct task_struct *prev,
49 local_irq_restore(flags); 78 local_irq_restore(flags);
50} 79}
51 80
81void
82tracing_sched_wakeup_trace(struct trace_array *tr,
83 struct task_struct *wakee,
84 struct task_struct *curr,
85 unsigned long flags, int pc)
86{
87 struct ftrace_event_call *call = &event_wakeup;
88 struct ring_buffer_event *event;
89 struct ctx_switch_entry *entry;
90 struct ring_buffer *buffer = tr->buffer;
91
92 event = trace_buffer_lock_reserve(buffer, TRACE_WAKE,
93 sizeof(*entry), flags, pc);
94 if (!event)
95 return;
96 entry = ring_buffer_event_data(event);
97 entry->prev_pid = curr->pid;
98 entry->prev_prio = curr->prio;
99 entry->prev_state = curr->state;
100 entry->next_pid = wakee->pid;
101 entry->next_prio = wakee->prio;
102 entry->next_state = wakee->state;
103 entry->next_cpu = task_cpu(wakee);
104
105 if (!filter_check_discard(call, entry, buffer, event))
106 ring_buffer_unlock_commit(buffer, event);
107 ftrace_trace_stack(tr->buffer, flags, 6, pc);
108 ftrace_trace_userstack(tr->buffer, flags, pc);
109}
110
52static void 111static void
53probe_sched_wakeup(struct rq *__rq, struct task_struct *wakee, int success) 112probe_sched_wakeup(struct rq *__rq, struct task_struct *wakee, int success)
54{ 113{
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index eacb27225173..26185d727676 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -24,6 +24,7 @@ static int __read_mostly tracer_enabled;
24 24
25static struct task_struct *wakeup_task; 25static struct task_struct *wakeup_task;
26static int wakeup_cpu; 26static int wakeup_cpu;
27static int wakeup_current_cpu;
27static unsigned wakeup_prio = -1; 28static unsigned wakeup_prio = -1;
28static int wakeup_rt; 29static int wakeup_rt;
29 30
@@ -56,33 +57,23 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
56 resched = ftrace_preempt_disable(); 57 resched = ftrace_preempt_disable();
57 58
58 cpu = raw_smp_processor_id(); 59 cpu = raw_smp_processor_id();
60 if (cpu != wakeup_current_cpu)
61 goto out_enable;
62
59 data = tr->data[cpu]; 63 data = tr->data[cpu];
60 disabled = atomic_inc_return(&data->disabled); 64 disabled = atomic_inc_return(&data->disabled);
61 if (unlikely(disabled != 1)) 65 if (unlikely(disabled != 1))
62 goto out; 66 goto out;
63 67
64 local_irq_save(flags); 68 local_irq_save(flags);
65 __raw_spin_lock(&wakeup_lock);
66
67 if (unlikely(!wakeup_task))
68 goto unlock;
69
70 /*
71 * The task can't disappear because it needs to
72 * wake up first, and we have the wakeup_lock.
73 */
74 if (task_cpu(wakeup_task) != cpu)
75 goto unlock;
76 69
77 trace_function(tr, ip, parent_ip, flags, pc); 70 trace_function(tr, ip, parent_ip, flags, pc);
78 71
79 unlock:
80 __raw_spin_unlock(&wakeup_lock);
81 local_irq_restore(flags); 72 local_irq_restore(flags);
82 73
83 out: 74 out:
84 atomic_dec(&data->disabled); 75 atomic_dec(&data->disabled);
85 76 out_enable:
86 ftrace_preempt_enable(resched); 77 ftrace_preempt_enable(resched);
87} 78}
88 79
@@ -107,11 +98,18 @@ static int report_latency(cycle_t delta)
107 return 1; 98 return 1;
108} 99}
109 100
101static void probe_wakeup_migrate_task(struct task_struct *task, int cpu)
102{
103 if (task != wakeup_task)
104 return;
105
106 wakeup_current_cpu = cpu;
107}
108
110static void notrace 109static void notrace
111probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev, 110probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev,
112 struct task_struct *next) 111 struct task_struct *next)
113{ 112{
114 unsigned long latency = 0, t0 = 0, t1 = 0;
115 struct trace_array_cpu *data; 113 struct trace_array_cpu *data;
116 cycle_t T0, T1, delta; 114 cycle_t T0, T1, delta;
117 unsigned long flags; 115 unsigned long flags;
@@ -157,10 +155,6 @@ probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev,
157 trace_function(wakeup_trace, CALLER_ADDR0, CALLER_ADDR1, flags, pc); 155 trace_function(wakeup_trace, CALLER_ADDR0, CALLER_ADDR1, flags, pc);
158 tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc); 156 tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc);
159 157
160 /*
161 * usecs conversion is slow so we try to delay the conversion
162 * as long as possible:
163 */
164 T0 = data->preempt_timestamp; 158 T0 = data->preempt_timestamp;
165 T1 = ftrace_now(cpu); 159 T1 = ftrace_now(cpu);
166 delta = T1-T0; 160 delta = T1-T0;
@@ -168,13 +162,10 @@ probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev,
168 if (!report_latency(delta)) 162 if (!report_latency(delta))
169 goto out_unlock; 163 goto out_unlock;
170 164
171 latency = nsecs_to_usecs(delta); 165 if (likely(!is_tracing_stopped())) {
172 166 tracing_max_latency = delta;
173 tracing_max_latency = delta; 167 update_max_tr(wakeup_trace, wakeup_task, wakeup_cpu);
174 t0 = nsecs_to_usecs(T0); 168 }
175 t1 = nsecs_to_usecs(T1);
176
177 update_max_tr(wakeup_trace, wakeup_task, wakeup_cpu);
178 169
179out_unlock: 170out_unlock:
180 __wakeup_reset(wakeup_trace); 171 __wakeup_reset(wakeup_trace);
@@ -186,11 +177,6 @@ out:
186 177
187static void __wakeup_reset(struct trace_array *tr) 178static void __wakeup_reset(struct trace_array *tr)
188{ 179{
189 int cpu;
190
191 for_each_possible_cpu(cpu)
192 tracing_reset(tr, cpu);
193
194 wakeup_cpu = -1; 180 wakeup_cpu = -1;
195 wakeup_prio = -1; 181 wakeup_prio = -1;
196 182
@@ -204,6 +190,8 @@ static void wakeup_reset(struct trace_array *tr)
204{ 190{
205 unsigned long flags; 191 unsigned long flags;
206 192
193 tracing_reset_online_cpus(tr);
194
207 local_irq_save(flags); 195 local_irq_save(flags);
208 __raw_spin_lock(&wakeup_lock); 196 __raw_spin_lock(&wakeup_lock);
209 __wakeup_reset(tr); 197 __wakeup_reset(tr);
@@ -247,6 +235,7 @@ probe_wakeup(struct rq *rq, struct task_struct *p, int success)
247 __wakeup_reset(wakeup_trace); 235 __wakeup_reset(wakeup_trace);
248 236
249 wakeup_cpu = task_cpu(p); 237 wakeup_cpu = task_cpu(p);
238 wakeup_current_cpu = wakeup_cpu;
250 wakeup_prio = p->prio; 239 wakeup_prio = p->prio;
251 240
252 wakeup_task = p; 241 wakeup_task = p;
@@ -296,6 +285,13 @@ static void start_wakeup_tracer(struct trace_array *tr)
296 goto fail_deprobe_wake_new; 285 goto fail_deprobe_wake_new;
297 } 286 }
298 287
288 ret = register_trace_sched_migrate_task(probe_wakeup_migrate_task);
289 if (ret) {
290 pr_info("wakeup trace: Couldn't activate tracepoint"
291 " probe to kernel_sched_migrate_task\n");
292 return;
293 }
294
299 wakeup_reset(tr); 295 wakeup_reset(tr);
300 296
301 /* 297 /*
@@ -328,6 +324,7 @@ static void stop_wakeup_tracer(struct trace_array *tr)
328 unregister_trace_sched_switch(probe_wakeup_sched_switch); 324 unregister_trace_sched_switch(probe_wakeup_sched_switch);
329 unregister_trace_sched_wakeup_new(probe_wakeup); 325 unregister_trace_sched_wakeup_new(probe_wakeup);
330 unregister_trace_sched_wakeup(probe_wakeup); 326 unregister_trace_sched_wakeup(probe_wakeup);
327 unregister_trace_sched_migrate_task(probe_wakeup_migrate_task);
331} 328}
332 329
333static int __wakeup_tracer_init(struct trace_array *tr) 330static int __wakeup_tracer_init(struct trace_array *tr)
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 00dd6485bdd7..d2cdbabb4ead 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -288,6 +288,7 @@ trace_selftest_startup_function_graph(struct tracer *trace,
288 * to detect and recover from possible hangs 288 * to detect and recover from possible hangs
289 */ 289 */
290 tracing_reset_online_cpus(tr); 290 tracing_reset_online_cpus(tr);
291 set_graph_array(tr);
291 ret = register_ftrace_graph(&trace_graph_return, 292 ret = register_ftrace_graph(&trace_graph_return,
292 &trace_graph_entry_watchdog); 293 &trace_graph_entry_watchdog);
293 if (ret) { 294 if (ret) {
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index e644af910124..8504ac71e4e8 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -186,43 +186,33 @@ static const struct file_operations stack_max_size_fops = {
186}; 186};
187 187
188static void * 188static void *
189t_next(struct seq_file *m, void *v, loff_t *pos) 189__next(struct seq_file *m, loff_t *pos)
190{ 190{
191 long i; 191 long n = *pos - 1;
192
193 (*pos)++;
194
195 if (v == SEQ_START_TOKEN)
196 i = 0;
197 else {
198 i = *(long *)v;
199 i++;
200 }
201 192
202 if (i >= max_stack_trace.nr_entries || 193 if (n >= max_stack_trace.nr_entries || stack_dump_trace[n] == ULONG_MAX)
203 stack_dump_trace[i] == ULONG_MAX)
204 return NULL; 194 return NULL;
205 195
206 m->private = (void *)i; 196 m->private = (void *)n;
207
208 return &m->private; 197 return &m->private;
209} 198}
210 199
211static void *t_start(struct seq_file *m, loff_t *pos) 200static void *
201t_next(struct seq_file *m, void *v, loff_t *pos)
212{ 202{
213 void *t = SEQ_START_TOKEN; 203 (*pos)++;
214 loff_t l = 0; 204 return __next(m, pos);
205}
215 206
207static void *t_start(struct seq_file *m, loff_t *pos)
208{
216 local_irq_disable(); 209 local_irq_disable();
217 __raw_spin_lock(&max_stack_lock); 210 __raw_spin_lock(&max_stack_lock);
218 211
219 if (*pos == 0) 212 if (*pos == 0)
220 return SEQ_START_TOKEN; 213 return SEQ_START_TOKEN;
221 214
222 for (; t && l < *pos; t = t_next(m, t, &l)) 215 return __next(m, pos);
223 ;
224
225 return t;
226} 216}
227 217
228static void t_stop(struct seq_file *m, void *p) 218static void t_stop(struct seq_file *m, void *p)
@@ -234,15 +224,8 @@ static void t_stop(struct seq_file *m, void *p)
234static int trace_lookup_stack(struct seq_file *m, long i) 224static int trace_lookup_stack(struct seq_file *m, long i)
235{ 225{
236 unsigned long addr = stack_dump_trace[i]; 226 unsigned long addr = stack_dump_trace[i];
237#ifdef CONFIG_KALLSYMS
238 char str[KSYM_SYMBOL_LEN];
239 227
240 sprint_symbol(str, addr); 228 return seq_printf(m, "%pF\n", (void *)addr);
241
242 return seq_printf(m, "%s\n", str);
243#else
244 return seq_printf(m, "%p\n", (void*)addr);
245#endif
246} 229}
247 230
248static void print_disabled(struct seq_file *m) 231static void print_disabled(struct seq_file *m)
@@ -301,29 +284,26 @@ static const struct seq_operations stack_trace_seq_ops = {
301 284
302static int stack_trace_open(struct inode *inode, struct file *file) 285static int stack_trace_open(struct inode *inode, struct file *file)
303{ 286{
304 int ret; 287 return seq_open(file, &stack_trace_seq_ops);
305
306 ret = seq_open(file, &stack_trace_seq_ops);
307
308 return ret;
309} 288}
310 289
311static const struct file_operations stack_trace_fops = { 290static const struct file_operations stack_trace_fops = {
312 .open = stack_trace_open, 291 .open = stack_trace_open,
313 .read = seq_read, 292 .read = seq_read,
314 .llseek = seq_lseek, 293 .llseek = seq_lseek,
294 .release = seq_release,
315}; 295};
316 296
317int 297int
318stack_trace_sysctl(struct ctl_table *table, int write, 298stack_trace_sysctl(struct ctl_table *table, int write,
319 struct file *file, void __user *buffer, size_t *lenp, 299 void __user *buffer, size_t *lenp,
320 loff_t *ppos) 300 loff_t *ppos)
321{ 301{
322 int ret; 302 int ret;
323 303
324 mutex_lock(&stack_sysctl_mutex); 304 mutex_lock(&stack_sysctl_mutex);
325 305
326 ret = proc_dointvec(table, write, file, buffer, lenp, ppos); 306 ret = proc_dointvec(table, write, buffer, lenp, ppos);
327 307
328 if (ret || !write || 308 if (ret || !write ||
329 (last_stack_tracer_enabled == !!stack_tracer_enabled)) 309 (last_stack_tracer_enabled == !!stack_tracer_enabled))
diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c
index e66f5e493342..a4bb239eb987 100644
--- a/kernel/trace/trace_stat.c
+++ b/kernel/trace/trace_stat.c
@@ -49,7 +49,8 @@ static struct dentry *stat_dir;
49 * but it will at least advance closer to the next one 49 * but it will at least advance closer to the next one
50 * to be released. 50 * to be released.
51 */ 51 */
52static struct rb_node *release_next(struct rb_node *node) 52static struct rb_node *release_next(struct tracer_stat *ts,
53 struct rb_node *node)
53{ 54{
54 struct stat_node *snode; 55 struct stat_node *snode;
55 struct rb_node *parent = rb_parent(node); 56 struct rb_node *parent = rb_parent(node);
@@ -67,26 +68,35 @@ static struct rb_node *release_next(struct rb_node *node)
67 parent->rb_right = NULL; 68 parent->rb_right = NULL;
68 69
69 snode = container_of(node, struct stat_node, node); 70 snode = container_of(node, struct stat_node, node);
71 if (ts->stat_release)
72 ts->stat_release(snode->stat);
70 kfree(snode); 73 kfree(snode);
71 74
72 return parent; 75 return parent;
73 } 76 }
74} 77}
75 78
76static void reset_stat_session(struct stat_session *session) 79static void __reset_stat_session(struct stat_session *session)
77{ 80{
78 struct rb_node *node = session->stat_root.rb_node; 81 struct rb_node *node = session->stat_root.rb_node;
79 82
80 while (node) 83 while (node)
81 node = release_next(node); 84 node = release_next(session->ts, node);
82 85
83 session->stat_root = RB_ROOT; 86 session->stat_root = RB_ROOT;
84} 87}
85 88
89static void reset_stat_session(struct stat_session *session)
90{
91 mutex_lock(&session->stat_mutex);
92 __reset_stat_session(session);
93 mutex_unlock(&session->stat_mutex);
94}
95
86static void destroy_session(struct stat_session *session) 96static void destroy_session(struct stat_session *session)
87{ 97{
88 debugfs_remove(session->file); 98 debugfs_remove(session->file);
89 reset_stat_session(session); 99 __reset_stat_session(session);
90 mutex_destroy(&session->stat_mutex); 100 mutex_destroy(&session->stat_mutex);
91 kfree(session); 101 kfree(session);
92} 102}
@@ -150,7 +160,7 @@ static int stat_seq_init(struct stat_session *session)
150 int i; 160 int i;
151 161
152 mutex_lock(&session->stat_mutex); 162 mutex_lock(&session->stat_mutex);
153 reset_stat_session(session); 163 __reset_stat_session(session);
154 164
155 if (!ts->stat_cmp) 165 if (!ts->stat_cmp)
156 ts->stat_cmp = dummy_cmp; 166 ts->stat_cmp = dummy_cmp;
@@ -183,7 +193,7 @@ exit:
183 return ret; 193 return ret;
184 194
185exit_free_rbtree: 195exit_free_rbtree:
186 reset_stat_session(session); 196 __reset_stat_session(session);
187 mutex_unlock(&session->stat_mutex); 197 mutex_unlock(&session->stat_mutex);
188 return ret; 198 return ret;
189} 199}
@@ -193,17 +203,21 @@ static void *stat_seq_start(struct seq_file *s, loff_t *pos)
193{ 203{
194 struct stat_session *session = s->private; 204 struct stat_session *session = s->private;
195 struct rb_node *node; 205 struct rb_node *node;
206 int n = *pos;
196 int i; 207 int i;
197 208
198 /* Prevent from tracer switch or rbtree modification */ 209 /* Prevent from tracer switch or rbtree modification */
199 mutex_lock(&session->stat_mutex); 210 mutex_lock(&session->stat_mutex);
200 211
201 /* If we are in the beginning of the file, print the headers */ 212 /* If we are in the beginning of the file, print the headers */
202 if (!*pos && session->ts->stat_headers) 213 if (session->ts->stat_headers) {
203 return SEQ_START_TOKEN; 214 if (n == 0)
215 return SEQ_START_TOKEN;
216 n--;
217 }
204 218
205 node = rb_first(&session->stat_root); 219 node = rb_first(&session->stat_root);
206 for (i = 0; node && i < *pos; i++) 220 for (i = 0; node && i < n; i++)
207 node = rb_next(node); 221 node = rb_next(node);
208 222
209 return node; 223 return node;
@@ -250,16 +264,21 @@ static const struct seq_operations trace_stat_seq_ops = {
250static int tracing_stat_open(struct inode *inode, struct file *file) 264static int tracing_stat_open(struct inode *inode, struct file *file)
251{ 265{
252 int ret; 266 int ret;
253 267 struct seq_file *m;
254 struct stat_session *session = inode->i_private; 268 struct stat_session *session = inode->i_private;
255 269
270 ret = stat_seq_init(session);
271 if (ret)
272 return ret;
273
256 ret = seq_open(file, &trace_stat_seq_ops); 274 ret = seq_open(file, &trace_stat_seq_ops);
257 if (!ret) { 275 if (ret) {
258 struct seq_file *m = file->private_data; 276 reset_stat_session(session);
259 m->private = session; 277 return ret;
260 ret = stat_seq_init(session);
261 } 278 }
262 279
280 m = file->private_data;
281 m->private = session;
263 return ret; 282 return ret;
264} 283}
265 284
@@ -270,11 +289,9 @@ static int tracing_stat_release(struct inode *i, struct file *f)
270{ 289{
271 struct stat_session *session = i->i_private; 290 struct stat_session *session = i->i_private;
272 291
273 mutex_lock(&session->stat_mutex);
274 reset_stat_session(session); 292 reset_stat_session(session);
275 mutex_unlock(&session->stat_mutex);
276 293
277 return 0; 294 return seq_release(i, f);
278} 295}
279 296
280static const struct file_operations tracing_stat_fops = { 297static const struct file_operations tracing_stat_fops = {
diff --git a/kernel/trace/trace_stat.h b/kernel/trace/trace_stat.h
index f3546a2cd826..8f03914b9a6a 100644
--- a/kernel/trace/trace_stat.h
+++ b/kernel/trace/trace_stat.h
@@ -18,6 +18,8 @@ struct tracer_stat {
18 int (*stat_cmp)(void *p1, void *p2); 18 int (*stat_cmp)(void *p1, void *p2);
19 /* Print a stat entry */ 19 /* Print a stat entry */
20 int (*stat_show)(struct seq_file *s, void *p); 20 int (*stat_show)(struct seq_file *s, void *p);
21 /* Release an entry */
22 void (*stat_release)(void *stat);
21 /* Print the headers of your stat entries */ 23 /* Print the headers of your stat entries */
22 int (*stat_headers)(struct seq_file *s); 24 int (*stat_headers)(struct seq_file *s);
23}; 25};
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 5e579645ac86..9fbce6c9d2e1 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -1,30 +1,18 @@
1#include <trace/syscall.h> 1#include <trace/syscall.h>
2#include <trace/events/syscalls.h>
2#include <linux/kernel.h> 3#include <linux/kernel.h>
4#include <linux/ftrace.h>
5#include <linux/perf_event.h>
3#include <asm/syscall.h> 6#include <asm/syscall.h>
4 7
5#include "trace_output.h" 8#include "trace_output.h"
6#include "trace.h" 9#include "trace.h"
7 10
8/* Keep a counter of the syscall tracing users */
9static int refcount;
10
11/* Prevent from races on thread flags toggling */
12static DEFINE_MUTEX(syscall_trace_lock); 11static DEFINE_MUTEX(syscall_trace_lock);
13 12static int sys_refcount_enter;
14/* Option to display the parameters types */ 13static int sys_refcount_exit;
15enum { 14static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls);
16 TRACE_SYSCALLS_OPT_TYPES = 0x1, 15static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls);
17};
18
19static struct tracer_opt syscalls_opts[] = {
20 { TRACER_OPT(syscall_arg_type, TRACE_SYSCALLS_OPT_TYPES) },
21 { }
22};
23
24static struct tracer_flags syscalls_flags = {
25 .val = 0, /* By default: no parameters types */
26 .opts = syscalls_opts
27};
28 16
29enum print_line_t 17enum print_line_t
30print_syscall_enter(struct trace_iterator *iter, int flags) 18print_syscall_enter(struct trace_iterator *iter, int flags)
@@ -35,35 +23,46 @@ print_syscall_enter(struct trace_iterator *iter, int flags)
35 struct syscall_metadata *entry; 23 struct syscall_metadata *entry;
36 int i, ret, syscall; 24 int i, ret, syscall;
37 25
38 trace_assign_type(trace, ent); 26 trace = (typeof(trace))ent;
39
40 syscall = trace->nr; 27 syscall = trace->nr;
41
42 entry = syscall_nr_to_meta(syscall); 28 entry = syscall_nr_to_meta(syscall);
29
43 if (!entry) 30 if (!entry)
44 goto end; 31 goto end;
45 32
33 if (entry->enter_id != ent->type) {
34 WARN_ON_ONCE(1);
35 goto end;
36 }
37
46 ret = trace_seq_printf(s, "%s(", entry->name); 38 ret = trace_seq_printf(s, "%s(", entry->name);
47 if (!ret) 39 if (!ret)
48 return TRACE_TYPE_PARTIAL_LINE; 40 return TRACE_TYPE_PARTIAL_LINE;
49 41
50 for (i = 0; i < entry->nb_args; i++) { 42 for (i = 0; i < entry->nb_args; i++) {
51 /* parameter types */ 43 /* parameter types */
52 if (syscalls_flags.val & TRACE_SYSCALLS_OPT_TYPES) { 44 if (trace_flags & TRACE_ITER_VERBOSE) {
53 ret = trace_seq_printf(s, "%s ", entry->types[i]); 45 ret = trace_seq_printf(s, "%s ", entry->types[i]);
54 if (!ret) 46 if (!ret)
55 return TRACE_TYPE_PARTIAL_LINE; 47 return TRACE_TYPE_PARTIAL_LINE;
56 } 48 }
57 /* parameter values */ 49 /* parameter values */
58 ret = trace_seq_printf(s, "%s: %lx%s ", entry->args[i], 50 ret = trace_seq_printf(s, "%s: %lx%s", entry->args[i],
59 trace->args[i], 51 trace->args[i],
60 i == entry->nb_args - 1 ? ")" : ","); 52 i == entry->nb_args - 1 ? "" : ", ");
61 if (!ret) 53 if (!ret)
62 return TRACE_TYPE_PARTIAL_LINE; 54 return TRACE_TYPE_PARTIAL_LINE;
63 } 55 }
64 56
57 ret = trace_seq_putc(s, ')');
58 if (!ret)
59 return TRACE_TYPE_PARTIAL_LINE;
60
65end: 61end:
66 trace_seq_printf(s, "\n"); 62 ret = trace_seq_putc(s, '\n');
63 if (!ret)
64 return TRACE_TYPE_PARTIAL_LINE;
65
67 return TRACE_TYPE_HANDLED; 66 return TRACE_TYPE_HANDLED;
68} 67}
69 68
@@ -77,16 +76,20 @@ print_syscall_exit(struct trace_iterator *iter, int flags)
77 struct syscall_metadata *entry; 76 struct syscall_metadata *entry;
78 int ret; 77 int ret;
79 78
80 trace_assign_type(trace, ent); 79 trace = (typeof(trace))ent;
81
82 syscall = trace->nr; 80 syscall = trace->nr;
83
84 entry = syscall_nr_to_meta(syscall); 81 entry = syscall_nr_to_meta(syscall);
82
85 if (!entry) { 83 if (!entry) {
86 trace_seq_printf(s, "\n"); 84 trace_seq_printf(s, "\n");
87 return TRACE_TYPE_HANDLED; 85 return TRACE_TYPE_HANDLED;
88 } 86 }
89 87
88 if (entry->exit_id != ent->type) {
89 WARN_ON_ONCE(1);
90 return TRACE_TYPE_UNHANDLED;
91 }
92
90 ret = trace_seq_printf(s, "%s -> 0x%lx\n", entry->name, 93 ret = trace_seq_printf(s, "%s -> 0x%lx\n", entry->name,
91 trace->ret); 94 trace->ret);
92 if (!ret) 95 if (!ret)
@@ -95,62 +98,140 @@ print_syscall_exit(struct trace_iterator *iter, int flags)
95 return TRACE_TYPE_HANDLED; 98 return TRACE_TYPE_HANDLED;
96} 99}
97 100
98void start_ftrace_syscalls(void) 101extern char *__bad_type_size(void);
102
103#define SYSCALL_FIELD(type, name) \
104 sizeof(type) != sizeof(trace.name) ? \
105 __bad_type_size() : \
106 #type, #name, offsetof(typeof(trace), name), sizeof(trace.name)
107
108int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s)
99{ 109{
100 unsigned long flags; 110 int i;
101 struct task_struct *g, *t; 111 int nr;
112 int ret;
113 struct syscall_metadata *entry;
114 struct syscall_trace_enter trace;
115 int offset = offsetof(struct syscall_trace_enter, args);
102 116
103 mutex_lock(&syscall_trace_lock); 117 nr = syscall_name_to_nr(call->data);
118 entry = syscall_nr_to_meta(nr);
104 119
105 /* Don't enable the flag on the tasks twice */ 120 if (!entry)
106 if (++refcount != 1) 121 return 0;
107 goto unlock;
108 122
109 arch_init_ftrace_syscalls(); 123 ret = trace_seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n",
110 read_lock_irqsave(&tasklist_lock, flags); 124 SYSCALL_FIELD(int, nr));
125 if (!ret)
126 return 0;
111 127
112 do_each_thread(g, t) { 128 for (i = 0; i < entry->nb_args; i++) {
113 set_tsk_thread_flag(t, TIF_SYSCALL_FTRACE); 129 ret = trace_seq_printf(s, "\tfield:%s %s;", entry->types[i],
114 } while_each_thread(g, t); 130 entry->args[i]);
131 if (!ret)
132 return 0;
133 ret = trace_seq_printf(s, "\toffset:%d;\tsize:%zu;\n", offset,
134 sizeof(unsigned long));
135 if (!ret)
136 return 0;
137 offset += sizeof(unsigned long);
138 }
115 139
116 read_unlock_irqrestore(&tasklist_lock, flags); 140 trace_seq_puts(s, "\nprint fmt: \"");
141 for (i = 0; i < entry->nb_args; i++) {
142 ret = trace_seq_printf(s, "%s: 0x%%0%zulx%s", entry->args[i],
143 sizeof(unsigned long),
144 i == entry->nb_args - 1 ? "" : ", ");
145 if (!ret)
146 return 0;
147 }
148 trace_seq_putc(s, '"');
117 149
118unlock: 150 for (i = 0; i < entry->nb_args; i++) {
119 mutex_unlock(&syscall_trace_lock); 151 ret = trace_seq_printf(s, ", ((unsigned long)(REC->%s))",
152 entry->args[i]);
153 if (!ret)
154 return 0;
155 }
156
157 return trace_seq_putc(s, '\n');
120} 158}
121 159
122void stop_ftrace_syscalls(void) 160int syscall_exit_format(struct ftrace_event_call *call, struct trace_seq *s)
123{ 161{
124 unsigned long flags; 162 int ret;
125 struct task_struct *g, *t; 163 struct syscall_trace_exit trace;
126 164
127 mutex_lock(&syscall_trace_lock); 165 ret = trace_seq_printf(s,
166 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
167 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n",
168 SYSCALL_FIELD(int, nr),
169 SYSCALL_FIELD(unsigned long, ret));
170 if (!ret)
171 return 0;
128 172
129 /* There are perhaps still some users */ 173 return trace_seq_printf(s, "\nprint fmt: \"0x%%lx\", REC->ret\n");
130 if (--refcount) 174}
131 goto unlock;
132 175
133 read_lock_irqsave(&tasklist_lock, flags); 176int syscall_enter_define_fields(struct ftrace_event_call *call)
177{
178 struct syscall_trace_enter trace;
179 struct syscall_metadata *meta;
180 int ret;
181 int nr;
182 int i;
183 int offset = offsetof(typeof(trace), args);
184
185 nr = syscall_name_to_nr(call->data);
186 meta = syscall_nr_to_meta(nr);
187
188 if (!meta)
189 return 0;
190
191 ret = trace_define_common_fields(call);
192 if (ret)
193 return ret;
194
195 for (i = 0; i < meta->nb_args; i++) {
196 ret = trace_define_field(call, meta->types[i],
197 meta->args[i], offset,
198 sizeof(unsigned long), 0,
199 FILTER_OTHER);
200 offset += sizeof(unsigned long);
201 }
134 202
135 do_each_thread(g, t) { 203 return ret;
136 clear_tsk_thread_flag(t, TIF_SYSCALL_FTRACE); 204}
137 } while_each_thread(g, t);
138 205
139 read_unlock_irqrestore(&tasklist_lock, flags); 206int syscall_exit_define_fields(struct ftrace_event_call *call)
207{
208 struct syscall_trace_exit trace;
209 int ret;
140 210
141unlock: 211 ret = trace_define_common_fields(call);
142 mutex_unlock(&syscall_trace_lock); 212 if (ret)
213 return ret;
214
215 ret = trace_define_field(call, SYSCALL_FIELD(unsigned long, ret), 0,
216 FILTER_OTHER);
217
218 return ret;
143} 219}
144 220
145void ftrace_syscall_enter(struct pt_regs *regs) 221void ftrace_syscall_enter(struct pt_regs *regs, long id)
146{ 222{
147 struct syscall_trace_enter *entry; 223 struct syscall_trace_enter *entry;
148 struct syscall_metadata *sys_data; 224 struct syscall_metadata *sys_data;
149 struct ring_buffer_event *event; 225 struct ring_buffer_event *event;
226 struct ring_buffer *buffer;
150 int size; 227 int size;
151 int syscall_nr; 228 int syscall_nr;
152 229
153 syscall_nr = syscall_get_nr(current, regs); 230 syscall_nr = syscall_get_nr(current, regs);
231 if (syscall_nr < 0)
232 return;
233 if (!test_bit(syscall_nr, enabled_enter_syscalls))
234 return;
154 235
155 sys_data = syscall_nr_to_meta(syscall_nr); 236 sys_data = syscall_nr_to_meta(syscall_nr);
156 if (!sys_data) 237 if (!sys_data)
@@ -158,8 +239,8 @@ void ftrace_syscall_enter(struct pt_regs *regs)
158 239
159 size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args; 240 size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args;
160 241
161 event = trace_current_buffer_lock_reserve(TRACE_SYSCALL_ENTER, size, 242 event = trace_current_buffer_lock_reserve(&buffer, sys_data->enter_id,
162 0, 0); 243 size, 0, 0);
163 if (!event) 244 if (!event)
164 return; 245 return;
165 246
@@ -167,24 +248,30 @@ void ftrace_syscall_enter(struct pt_regs *regs)
167 entry->nr = syscall_nr; 248 entry->nr = syscall_nr;
168 syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args); 249 syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args);
169 250
170 trace_current_buffer_unlock_commit(event, 0, 0); 251 if (!filter_current_check_discard(buffer, sys_data->enter_event,
171 trace_wake_up(); 252 entry, event))
253 trace_current_buffer_unlock_commit(buffer, event, 0, 0);
172} 254}
173 255
174void ftrace_syscall_exit(struct pt_regs *regs) 256void ftrace_syscall_exit(struct pt_regs *regs, long ret)
175{ 257{
176 struct syscall_trace_exit *entry; 258 struct syscall_trace_exit *entry;
177 struct syscall_metadata *sys_data; 259 struct syscall_metadata *sys_data;
178 struct ring_buffer_event *event; 260 struct ring_buffer_event *event;
261 struct ring_buffer *buffer;
179 int syscall_nr; 262 int syscall_nr;
180 263
181 syscall_nr = syscall_get_nr(current, regs); 264 syscall_nr = syscall_get_nr(current, regs);
265 if (syscall_nr < 0)
266 return;
267 if (!test_bit(syscall_nr, enabled_exit_syscalls))
268 return;
182 269
183 sys_data = syscall_nr_to_meta(syscall_nr); 270 sys_data = syscall_nr_to_meta(syscall_nr);
184 if (!sys_data) 271 if (!sys_data)
185 return; 272 return;
186 273
187 event = trace_current_buffer_lock_reserve(TRACE_SYSCALL_EXIT, 274 event = trace_current_buffer_lock_reserve(&buffer, sys_data->exit_id,
188 sizeof(*entry), 0, 0); 275 sizeof(*entry), 0, 0);
189 if (!event) 276 if (!event)
190 return; 277 return;
@@ -193,58 +280,303 @@ void ftrace_syscall_exit(struct pt_regs *regs)
193 entry->nr = syscall_nr; 280 entry->nr = syscall_nr;
194 entry->ret = syscall_get_return_value(current, regs); 281 entry->ret = syscall_get_return_value(current, regs);
195 282
196 trace_current_buffer_unlock_commit(event, 0, 0); 283 if (!filter_current_check_discard(buffer, sys_data->exit_event,
197 trace_wake_up(); 284 entry, event))
285 trace_current_buffer_unlock_commit(buffer, event, 0, 0);
198} 286}
199 287
200static int init_syscall_tracer(struct trace_array *tr) 288int reg_event_syscall_enter(void *ptr)
201{ 289{
202 start_ftrace_syscalls(); 290 int ret = 0;
291 int num;
292 char *name;
293
294 name = (char *)ptr;
295 num = syscall_name_to_nr(name);
296 if (num < 0 || num >= NR_syscalls)
297 return -ENOSYS;
298 mutex_lock(&syscall_trace_lock);
299 if (!sys_refcount_enter)
300 ret = register_trace_sys_enter(ftrace_syscall_enter);
301 if (ret) {
302 pr_info("event trace: Could not activate"
303 "syscall entry trace point");
304 } else {
305 set_bit(num, enabled_enter_syscalls);
306 sys_refcount_enter++;
307 }
308 mutex_unlock(&syscall_trace_lock);
309 return ret;
310}
311
312void unreg_event_syscall_enter(void *ptr)
313{
314 int num;
315 char *name;
203 316
204 return 0; 317 name = (char *)ptr;
318 num = syscall_name_to_nr(name);
319 if (num < 0 || num >= NR_syscalls)
320 return;
321 mutex_lock(&syscall_trace_lock);
322 sys_refcount_enter--;
323 clear_bit(num, enabled_enter_syscalls);
324 if (!sys_refcount_enter)
325 unregister_trace_sys_enter(ftrace_syscall_enter);
326 mutex_unlock(&syscall_trace_lock);
205} 327}
206 328
207static void reset_syscall_tracer(struct trace_array *tr) 329int reg_event_syscall_exit(void *ptr)
208{ 330{
209 stop_ftrace_syscalls(); 331 int ret = 0;
210 tracing_reset_online_cpus(tr); 332 int num;
333 char *name;
334
335 name = (char *)ptr;
336 num = syscall_name_to_nr(name);
337 if (num < 0 || num >= NR_syscalls)
338 return -ENOSYS;
339 mutex_lock(&syscall_trace_lock);
340 if (!sys_refcount_exit)
341 ret = register_trace_sys_exit(ftrace_syscall_exit);
342 if (ret) {
343 pr_info("event trace: Could not activate"
344 "syscall exit trace point");
345 } else {
346 set_bit(num, enabled_exit_syscalls);
347 sys_refcount_exit++;
348 }
349 mutex_unlock(&syscall_trace_lock);
350 return ret;
211} 351}
212 352
213static struct trace_event syscall_enter_event = { 353void unreg_event_syscall_exit(void *ptr)
214 .type = TRACE_SYSCALL_ENTER, 354{
215 .trace = print_syscall_enter, 355 int num;
216}; 356 char *name;
357
358 name = (char *)ptr;
359 num = syscall_name_to_nr(name);
360 if (num < 0 || num >= NR_syscalls)
361 return;
362 mutex_lock(&syscall_trace_lock);
363 sys_refcount_exit--;
364 clear_bit(num, enabled_exit_syscalls);
365 if (!sys_refcount_exit)
366 unregister_trace_sys_exit(ftrace_syscall_exit);
367 mutex_unlock(&syscall_trace_lock);
368}
217 369
218static struct trace_event syscall_exit_event = { 370struct trace_event event_syscall_enter = {
219 .type = TRACE_SYSCALL_EXIT, 371 .trace = print_syscall_enter,
220 .trace = print_syscall_exit,
221}; 372};
222 373
223static struct tracer syscall_tracer __read_mostly = { 374struct trace_event event_syscall_exit = {
224 .name = "syscall", 375 .trace = print_syscall_exit,
225 .init = init_syscall_tracer,
226 .reset = reset_syscall_tracer,
227 .flags = &syscalls_flags,
228}; 376};
229 377
230__init int register_ftrace_syscalls(void) 378#ifdef CONFIG_EVENT_PROFILE
379
380static DECLARE_BITMAP(enabled_prof_enter_syscalls, NR_syscalls);
381static DECLARE_BITMAP(enabled_prof_exit_syscalls, NR_syscalls);
382static int sys_prof_refcount_enter;
383static int sys_prof_refcount_exit;
384
385static void prof_syscall_enter(struct pt_regs *regs, long id)
231{ 386{
232 int ret; 387 struct syscall_metadata *sys_data;
388 struct syscall_trace_enter *rec;
389 unsigned long flags;
390 char *raw_data;
391 int syscall_nr;
392 int size;
393 int cpu;
233 394
234 ret = register_ftrace_event(&syscall_enter_event); 395 syscall_nr = syscall_get_nr(current, regs);
235 if (!ret) { 396 if (!test_bit(syscall_nr, enabled_prof_enter_syscalls))
236 printk(KERN_WARNING "event %d failed to register\n", 397 return;
237 syscall_enter_event.type); 398
238 WARN_ON_ONCE(1); 399 sys_data = syscall_nr_to_meta(syscall_nr);
400 if (!sys_data)
401 return;
402
403 /* get the size after alignment with the u32 buffer size field */
404 size = sizeof(unsigned long) * sys_data->nb_args + sizeof(*rec);
405 size = ALIGN(size + sizeof(u32), sizeof(u64));
406 size -= sizeof(u32);
407
408 if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
409 "profile buffer not large enough"))
410 return;
411
412 /* Protect the per cpu buffer, begin the rcu read side */
413 local_irq_save(flags);
414
415 cpu = smp_processor_id();
416
417 if (in_nmi())
418 raw_data = rcu_dereference(trace_profile_buf_nmi);
419 else
420 raw_data = rcu_dereference(trace_profile_buf);
421
422 if (!raw_data)
423 goto end;
424
425 raw_data = per_cpu_ptr(raw_data, cpu);
426
427 /* zero the dead bytes from align to not leak stack to user */
428 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
429
430 rec = (struct syscall_trace_enter *) raw_data;
431 tracing_generic_entry_update(&rec->ent, 0, 0);
432 rec->ent.type = sys_data->enter_id;
433 rec->nr = syscall_nr;
434 syscall_get_arguments(current, regs, 0, sys_data->nb_args,
435 (unsigned long *)&rec->args);
436 perf_tp_event(sys_data->enter_id, 0, 1, rec, size);
437
438end:
439 local_irq_restore(flags);
440}
441
442int reg_prof_syscall_enter(char *name)
443{
444 int ret = 0;
445 int num;
446
447 num = syscall_name_to_nr(name);
448 if (num < 0 || num >= NR_syscalls)
449 return -ENOSYS;
450
451 mutex_lock(&syscall_trace_lock);
452 if (!sys_prof_refcount_enter)
453 ret = register_trace_sys_enter(prof_syscall_enter);
454 if (ret) {
455 pr_info("event trace: Could not activate"
456 "syscall entry trace point");
457 } else {
458 set_bit(num, enabled_prof_enter_syscalls);
459 sys_prof_refcount_enter++;
239 } 460 }
461 mutex_unlock(&syscall_trace_lock);
462 return ret;
463}
240 464
241 ret = register_ftrace_event(&syscall_exit_event); 465void unreg_prof_syscall_enter(char *name)
242 if (!ret) { 466{
243 printk(KERN_WARNING "event %d failed to register\n", 467 int num;
244 syscall_exit_event.type); 468
245 WARN_ON_ONCE(1); 469 num = syscall_name_to_nr(name);
470 if (num < 0 || num >= NR_syscalls)
471 return;
472
473 mutex_lock(&syscall_trace_lock);
474 sys_prof_refcount_enter--;
475 clear_bit(num, enabled_prof_enter_syscalls);
476 if (!sys_prof_refcount_enter)
477 unregister_trace_sys_enter(prof_syscall_enter);
478 mutex_unlock(&syscall_trace_lock);
479}
480
481static void prof_syscall_exit(struct pt_regs *regs, long ret)
482{
483 struct syscall_metadata *sys_data;
484 struct syscall_trace_exit *rec;
485 unsigned long flags;
486 int syscall_nr;
487 char *raw_data;
488 int size;
489 int cpu;
490
491 syscall_nr = syscall_get_nr(current, regs);
492 if (!test_bit(syscall_nr, enabled_prof_exit_syscalls))
493 return;
494
495 sys_data = syscall_nr_to_meta(syscall_nr);
496 if (!sys_data)
497 return;
498
499 /* We can probably do that at build time */
500 size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64));
501 size -= sizeof(u32);
502
503 /*
504 * Impossible, but be paranoid with the future
505 * How to put this check outside runtime?
506 */
507 if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
508 "exit event has grown above profile buffer size"))
509 return;
510
511 /* Protect the per cpu buffer, begin the rcu read side */
512 local_irq_save(flags);
513 cpu = smp_processor_id();
514
515 if (in_nmi())
516 raw_data = rcu_dereference(trace_profile_buf_nmi);
517 else
518 raw_data = rcu_dereference(trace_profile_buf);
519
520 if (!raw_data)
521 goto end;
522
523 raw_data = per_cpu_ptr(raw_data, cpu);
524
525 /* zero the dead bytes from align to not leak stack to user */
526 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
527
528 rec = (struct syscall_trace_exit *)raw_data;
529
530 tracing_generic_entry_update(&rec->ent, 0, 0);
531 rec->ent.type = sys_data->exit_id;
532 rec->nr = syscall_nr;
533 rec->ret = syscall_get_return_value(current, regs);
534
535 perf_tp_event(sys_data->exit_id, 0, 1, rec, size);
536
537end:
538 local_irq_restore(flags);
539}
540
541int reg_prof_syscall_exit(char *name)
542{
543 int ret = 0;
544 int num;
545
546 num = syscall_name_to_nr(name);
547 if (num < 0 || num >= NR_syscalls)
548 return -ENOSYS;
549
550 mutex_lock(&syscall_trace_lock);
551 if (!sys_prof_refcount_exit)
552 ret = register_trace_sys_exit(prof_syscall_exit);
553 if (ret) {
554 pr_info("event trace: Could not activate"
555 "syscall entry trace point");
556 } else {
557 set_bit(num, enabled_prof_exit_syscalls);
558 sys_prof_refcount_exit++;
246 } 559 }
560 mutex_unlock(&syscall_trace_lock);
561 return ret;
562}
563
564void unreg_prof_syscall_exit(char *name)
565{
566 int num;
567
568 num = syscall_name_to_nr(name);
569 if (num < 0 || num >= NR_syscalls)
570 return;
247 571
248 return register_tracer(&syscall_tracer); 572 mutex_lock(&syscall_trace_lock);
573 sys_prof_refcount_exit--;
574 clear_bit(num, enabled_prof_exit_syscalls);
575 if (!sys_prof_refcount_exit)
576 unregister_trace_sys_exit(prof_syscall_exit);
577 mutex_unlock(&syscall_trace_lock);
249} 578}
250device_initcall(register_ftrace_syscalls); 579
580#endif
581
582
diff --git a/kernel/trace/trace_workqueue.c b/kernel/trace/trace_workqueue.c
index 97fcea4acce1..40cafb07dffd 100644
--- a/kernel/trace/trace_workqueue.c
+++ b/kernel/trace/trace_workqueue.c
@@ -9,6 +9,7 @@
9#include <trace/events/workqueue.h> 9#include <trace/events/workqueue.h>
10#include <linux/list.h> 10#include <linux/list.h>
11#include <linux/percpu.h> 11#include <linux/percpu.h>
12#include <linux/kref.h>
12#include "trace_stat.h" 13#include "trace_stat.h"
13#include "trace.h" 14#include "trace.h"
14 15
@@ -16,6 +17,7 @@
16/* A cpu workqueue thread */ 17/* A cpu workqueue thread */
17struct cpu_workqueue_stats { 18struct cpu_workqueue_stats {
18 struct list_head list; 19 struct list_head list;
20 struct kref kref;
19 int cpu; 21 int cpu;
20 pid_t pid; 22 pid_t pid;
21/* Can be inserted from interrupt or user context, need to be atomic */ 23/* Can be inserted from interrupt or user context, need to be atomic */
@@ -39,6 +41,11 @@ struct workqueue_global_stats {
39static DEFINE_PER_CPU(struct workqueue_global_stats, all_workqueue_stat); 41static DEFINE_PER_CPU(struct workqueue_global_stats, all_workqueue_stat);
40#define workqueue_cpu_stat(cpu) (&per_cpu(all_workqueue_stat, cpu)) 42#define workqueue_cpu_stat(cpu) (&per_cpu(all_workqueue_stat, cpu))
41 43
44static void cpu_workqueue_stat_free(struct kref *kref)
45{
46 kfree(container_of(kref, struct cpu_workqueue_stats, kref));
47}
48
42/* Insertion of a work */ 49/* Insertion of a work */
43static void 50static void
44probe_workqueue_insertion(struct task_struct *wq_thread, 51probe_workqueue_insertion(struct task_struct *wq_thread,
@@ -96,8 +103,8 @@ static void probe_workqueue_creation(struct task_struct *wq_thread, int cpu)
96 return; 103 return;
97 } 104 }
98 INIT_LIST_HEAD(&cws->list); 105 INIT_LIST_HEAD(&cws->list);
106 kref_init(&cws->kref);
99 cws->cpu = cpu; 107 cws->cpu = cpu;
100
101 cws->pid = wq_thread->pid; 108 cws->pid = wq_thread->pid;
102 109
103 spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags); 110 spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
@@ -118,7 +125,7 @@ static void probe_workqueue_destruction(struct task_struct *wq_thread)
118 list) { 125 list) {
119 if (node->pid == wq_thread->pid) { 126 if (node->pid == wq_thread->pid) {
120 list_del(&node->list); 127 list_del(&node->list);
121 kfree(node); 128 kref_put(&node->kref, cpu_workqueue_stat_free);
122 goto found; 129 goto found;
123 } 130 }
124 } 131 }
@@ -137,9 +144,11 @@ static struct cpu_workqueue_stats *workqueue_stat_start_cpu(int cpu)
137 144
138 spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags); 145 spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
139 146
140 if (!list_empty(&workqueue_cpu_stat(cpu)->list)) 147 if (!list_empty(&workqueue_cpu_stat(cpu)->list)) {
141 ret = list_entry(workqueue_cpu_stat(cpu)->list.next, 148 ret = list_entry(workqueue_cpu_stat(cpu)->list.next,
142 struct cpu_workqueue_stats, list); 149 struct cpu_workqueue_stats, list);
150 kref_get(&ret->kref);
151 }
143 152
144 spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags); 153 spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
145 154
@@ -162,9 +171,9 @@ static void *workqueue_stat_start(struct tracer_stat *trace)
162static void *workqueue_stat_next(void *prev, int idx) 171static void *workqueue_stat_next(void *prev, int idx)
163{ 172{
164 struct cpu_workqueue_stats *prev_cws = prev; 173 struct cpu_workqueue_stats *prev_cws = prev;
174 struct cpu_workqueue_stats *ret;
165 int cpu = prev_cws->cpu; 175 int cpu = prev_cws->cpu;
166 unsigned long flags; 176 unsigned long flags;
167 void *ret = NULL;
168 177
169 spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags); 178 spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
170 if (list_is_last(&prev_cws->list, &workqueue_cpu_stat(cpu)->list)) { 179 if (list_is_last(&prev_cws->list, &workqueue_cpu_stat(cpu)->list)) {
@@ -175,11 +184,14 @@ static void *workqueue_stat_next(void *prev, int idx)
175 return NULL; 184 return NULL;
176 } while (!(ret = workqueue_stat_start_cpu(cpu))); 185 } while (!(ret = workqueue_stat_start_cpu(cpu)));
177 return ret; 186 return ret;
187 } else {
188 ret = list_entry(prev_cws->list.next,
189 struct cpu_workqueue_stats, list);
190 kref_get(&ret->kref);
178 } 191 }
179 spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags); 192 spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
180 193
181 return list_entry(prev_cws->list.next, struct cpu_workqueue_stats, 194 return ret;
182 list);
183} 195}
184 196
185static int workqueue_stat_show(struct seq_file *s, void *p) 197static int workqueue_stat_show(struct seq_file *s, void *p)
@@ -203,6 +215,13 @@ static int workqueue_stat_show(struct seq_file *s, void *p)
203 return 0; 215 return 0;
204} 216}
205 217
218static void workqueue_stat_release(void *stat)
219{
220 struct cpu_workqueue_stats *node = stat;
221
222 kref_put(&node->kref, cpu_workqueue_stat_free);
223}
224
206static int workqueue_stat_headers(struct seq_file *s) 225static int workqueue_stat_headers(struct seq_file *s)
207{ 226{
208 seq_printf(s, "# CPU INSERTED EXECUTED NAME\n"); 227 seq_printf(s, "# CPU INSERTED EXECUTED NAME\n");
@@ -215,6 +234,7 @@ struct tracer_stat workqueue_stats __read_mostly = {
215 .stat_start = workqueue_stat_start, 234 .stat_start = workqueue_stat_start,
216 .stat_next = workqueue_stat_next, 235 .stat_next = workqueue_stat_next,
217 .stat_show = workqueue_stat_show, 236 .stat_show = workqueue_stat_show,
237 .stat_release = workqueue_stat_release,
218 .stat_headers = workqueue_stat_headers 238 .stat_headers = workqueue_stat_headers
219}; 239};
220 240