aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/powerpc/kernel/ftrace.c2
-rw-r--r--arch/s390/kernel/ftrace.c2
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/kernel/entry_32.S2
-rw-r--r--arch/x86/kernel/entry_64.S2
-rw-r--r--arch/x86/kernel/ftrace.c6
-rw-r--r--include/linux/ftrace.h4
-rw-r--r--include/linux/trace_seq.h2
-rw-r--r--kernel/trace/Kconfig8
-rw-r--r--kernel/trace/ftrace.c7
-rw-r--r--kernel/trace/kmemtrace.c2
-rw-r--r--kernel/trace/ring_buffer.c311
-rw-r--r--kernel/trace/ring_buffer_benchmark.c45
-rw-r--r--kernel/trace/trace.c8
-rw-r--r--kernel/trace/trace_events_filter.c37
-rw-r--r--kernel/trace/trace_functions.c8
-rw-r--r--kernel/trace/trace_functions_graph.c36
-rw-r--r--samples/trace_events/Makefile8
-rw-r--r--samples/trace_events/trace-events-sample.h27
19 files changed, 332 insertions, 186 deletions
diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c
index 1b12696cca06..ce1f3e44c24f 100644
--- a/arch/powerpc/kernel/ftrace.c
+++ b/arch/powerpc/kernel/ftrace.c
@@ -586,7 +586,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
586 return; 586 return;
587 } 587 }
588 588
589 if (ftrace_push_return_trace(old, self_addr, &trace.depth) == -EBUSY) { 589 if (ftrace_push_return_trace(old, self_addr, &trace.depth, 0) == -EBUSY) {
590 *parent = old; 590 *parent = old;
591 return; 591 return;
592 } 592 }
diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
index 82ddfd3a75af..3e298e64f0db 100644
--- a/arch/s390/kernel/ftrace.c
+++ b/arch/s390/kernel/ftrace.c
@@ -190,7 +190,7 @@ unsigned long prepare_ftrace_return(unsigned long ip, unsigned long parent)
190 goto out; 190 goto out;
191 if (unlikely(atomic_read(&current->tracing_graph_pause))) 191 if (unlikely(atomic_read(&current->tracing_graph_pause)))
192 goto out; 192 goto out;
193 if (ftrace_push_return_trace(parent, ip, &trace.depth) == -EBUSY) 193 if (ftrace_push_return_trace(parent, ip, &trace.depth, 0) == -EBUSY)
194 goto out; 194 goto out;
195 trace.func = ftrace_mcount_call_adjust(ip) & PSW_ADDR_INSN; 195 trace.func = ftrace_mcount_call_adjust(ip) & PSW_ADDR_INSN;
196 /* Only trace if the calling function expects to. */ 196 /* Only trace if the calling function expects to. */
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 73c0bda73fcd..d1430ef6b4f9 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -34,6 +34,7 @@ config X86
34 select HAVE_DYNAMIC_FTRACE 34 select HAVE_DYNAMIC_FTRACE
35 select HAVE_FUNCTION_TRACER 35 select HAVE_FUNCTION_TRACER
36 select HAVE_FUNCTION_GRAPH_TRACER 36 select HAVE_FUNCTION_GRAPH_TRACER
37 select HAVE_FUNCTION_GRAPH_FP_TEST
37 select HAVE_FUNCTION_TRACE_MCOUNT_TEST 38 select HAVE_FUNCTION_TRACE_MCOUNT_TEST
38 select HAVE_FTRACE_NMI_ENTER if DYNAMIC_FTRACE 39 select HAVE_FTRACE_NMI_ENTER if DYNAMIC_FTRACE
39 select HAVE_FTRACE_SYSCALLS 40 select HAVE_FTRACE_SYSCALLS
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 9f8ce77dbc64..c097e7d607c6 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -1174,6 +1174,7 @@ ENTRY(ftrace_graph_caller)
1174 pushl %edx 1174 pushl %edx
1175 movl 0xc(%esp), %edx 1175 movl 0xc(%esp), %edx
1176 lea 0x4(%ebp), %eax 1176 lea 0x4(%ebp), %eax
1177 movl (%ebp), %ecx
1177 subl $MCOUNT_INSN_SIZE, %edx 1178 subl $MCOUNT_INSN_SIZE, %edx
1178 call prepare_ftrace_return 1179 call prepare_ftrace_return
1179 popl %edx 1180 popl %edx
@@ -1188,6 +1189,7 @@ return_to_handler:
1188 pushl %eax 1189 pushl %eax
1189 pushl %ecx 1190 pushl %ecx
1190 pushl %edx 1191 pushl %edx
1192 movl %ebp, %eax
1191 call ftrace_return_to_handler 1193 call ftrace_return_to_handler
1192 movl %eax, 0xc(%esp) 1194 movl %eax, 0xc(%esp)
1193 popl %edx 1195 popl %edx
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index de74f0a3e0ed..c251be745107 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -135,6 +135,7 @@ ENTRY(ftrace_graph_caller)
135 135
136 leaq 8(%rbp), %rdi 136 leaq 8(%rbp), %rdi
137 movq 0x38(%rsp), %rsi 137 movq 0x38(%rsp), %rsi
138 movq (%rbp), %rdx
138 subq $MCOUNT_INSN_SIZE, %rsi 139 subq $MCOUNT_INSN_SIZE, %rsi
139 140
140 call prepare_ftrace_return 141 call prepare_ftrace_return
@@ -150,6 +151,7 @@ GLOBAL(return_to_handler)
150 /* Save the return values */ 151 /* Save the return values */
151 movq %rax, (%rsp) 152 movq %rax, (%rsp)
152 movq %rdx, 8(%rsp) 153 movq %rdx, 8(%rsp)
154 movq %rbp, %rdi
153 155
154 call ftrace_return_to_handler 156 call ftrace_return_to_handler
155 157
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index b79c5533c421..d94e1ea3b9fe 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -408,7 +408,8 @@ int ftrace_disable_ftrace_graph_caller(void)
408 * Hook the return address and push it in the stack of return addrs 408 * Hook the return address and push it in the stack of return addrs
409 * in current thread info. 409 * in current thread info.
410 */ 410 */
411void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) 411void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
412 unsigned long frame_pointer)
412{ 413{
413 unsigned long old; 414 unsigned long old;
414 int faulted; 415 int faulted;
@@ -453,7 +454,8 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
453 return; 454 return;
454 } 455 }
455 456
456 if (ftrace_push_return_trace(old, self_addr, &trace.depth) == -EBUSY) { 457 if (ftrace_push_return_trace(old, self_addr, &trace.depth,
458 frame_pointer) == -EBUSY) {
457 *parent = old; 459 *parent = old;
458 return; 460 return;
459 } 461 }
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 39b95c56587e..dc3b1328aaeb 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -362,6 +362,7 @@ struct ftrace_ret_stack {
362 unsigned long func; 362 unsigned long func;
363 unsigned long long calltime; 363 unsigned long long calltime;
364 unsigned long long subtime; 364 unsigned long long subtime;
365 unsigned long fp;
365}; 366};
366 367
367/* 368/*
@@ -372,7 +373,8 @@ struct ftrace_ret_stack {
372extern void return_to_handler(void); 373extern void return_to_handler(void);
373 374
374extern int 375extern int
375ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth); 376ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth,
377 unsigned long frame_pointer);
376 378
377/* 379/*
378 * Sometimes we don't want to trace a function with the function 380 * Sometimes we don't want to trace a function with the function
diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h
index c68bccba2074..c134dd1fe6b6 100644
--- a/include/linux/trace_seq.h
+++ b/include/linux/trace_seq.h
@@ -3,6 +3,8 @@
3 3
4#include <linux/fs.h> 4#include <linux/fs.h>
5 5
6#include <asm/page.h>
7
6/* 8/*
7 * Trace sequences are used to allow a function to call several other functions 9 * Trace sequences are used to allow a function to call several other functions
8 * to create a string of data to use (up to a max of PAGE_SIZE. 10 * to create a string of data to use (up to a max of PAGE_SIZE.
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 61071fecc82e..1551f47e7669 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -18,6 +18,13 @@ config HAVE_FUNCTION_TRACER
18config HAVE_FUNCTION_GRAPH_TRACER 18config HAVE_FUNCTION_GRAPH_TRACER
19 bool 19 bool
20 20
21config HAVE_FUNCTION_GRAPH_FP_TEST
22 bool
23 help
24 An arch may pass in a unique value (frame pointer) to both the
25 entering and exiting of a function. On exit, the value is compared
26 and if it does not match, then it will panic the kernel.
27
21config HAVE_FUNCTION_TRACE_MCOUNT_TEST 28config HAVE_FUNCTION_TRACE_MCOUNT_TEST
22 bool 29 bool
23 help 30 help
@@ -121,6 +128,7 @@ config FUNCTION_GRAPH_TRACER
121 bool "Kernel Function Graph Tracer" 128 bool "Kernel Function Graph Tracer"
122 depends on HAVE_FUNCTION_GRAPH_TRACER 129 depends on HAVE_FUNCTION_GRAPH_TRACER
123 depends on FUNCTION_TRACER 130 depends on FUNCTION_TRACER
131 depends on !X86_32 || !CC_OPTIMIZE_FOR_SIZE
124 default y 132 default y
125 help 133 help
126 Enable the kernel to trace a function at both its return 134 Enable the kernel to trace a function at both its return
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index bb60732ade0c..3718d55fb4c3 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1224,6 +1224,13 @@ static void ftrace_shutdown(int command)
1224 return; 1224 return;
1225 1225
1226 ftrace_start_up--; 1226 ftrace_start_up--;
1227 /*
1228 * Just warn in case of unbalance, no need to kill ftrace, it's not
1229 * critical but the ftrace_call callers may be never nopped again after
1230 * further ftrace uses.
1231 */
1232 WARN_ON_ONCE(ftrace_start_up < 0);
1233
1227 if (!ftrace_start_up) 1234 if (!ftrace_start_up)
1228 command |= FTRACE_DISABLE_CALLS; 1235 command |= FTRACE_DISABLE_CALLS;
1229 1236
diff --git a/kernel/trace/kmemtrace.c b/kernel/trace/kmemtrace.c
index 86cdf671d7e2..1edaa9516e81 100644
--- a/kernel/trace/kmemtrace.c
+++ b/kernel/trace/kmemtrace.c
@@ -186,7 +186,7 @@ static int kmem_trace_init(struct trace_array *tr)
186 int cpu; 186 int cpu;
187 kmemtrace_array = tr; 187 kmemtrace_array = tr;
188 188
189 for_each_cpu_mask(cpu, cpu_possible_map) 189 for_each_cpu(cpu, cpu_possible_mask)
190 tracing_reset(tr, cpu); 190 tracing_reset(tr, cpu);
191 191
192 kmemtrace_start_probes(); 192 kmemtrace_start_probes();
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index dc4dc70171ce..04dac2638258 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -206,6 +206,7 @@ EXPORT_SYMBOL_GPL(tracing_is_on);
206#define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array)) 206#define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array))
207#define RB_ALIGNMENT 4U 207#define RB_ALIGNMENT 4U
208#define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX) 208#define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
209#define RB_EVNT_MIN_SIZE 8U /* two 32bit words */
209 210
210/* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */ 211/* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */
211#define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX 212#define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX
@@ -415,6 +416,8 @@ struct ring_buffer_per_cpu {
415 unsigned long overrun; 416 unsigned long overrun;
416 unsigned long read; 417 unsigned long read;
417 local_t entries; 418 local_t entries;
419 local_t committing;
420 local_t commits;
418 u64 write_stamp; 421 u64 write_stamp;
419 u64 read_stamp; 422 u64 read_stamp;
420 atomic_t record_disabled; 423 atomic_t record_disabled;
@@ -618,12 +621,6 @@ static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
618 kfree(cpu_buffer); 621 kfree(cpu_buffer);
619} 622}
620 623
621/*
622 * Causes compile errors if the struct buffer_page gets bigger
623 * than the struct page.
624 */
625extern int ring_buffer_page_too_big(void);
626
627#ifdef CONFIG_HOTPLUG_CPU 624#ifdef CONFIG_HOTPLUG_CPU
628static int rb_cpu_notify(struct notifier_block *self, 625static int rb_cpu_notify(struct notifier_block *self,
629 unsigned long action, void *hcpu); 626 unsigned long action, void *hcpu);
@@ -646,11 +643,6 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
646 int bsize; 643 int bsize;
647 int cpu; 644 int cpu;
648 645
649 /* Paranoid! Optimizes out when all is well */
650 if (sizeof(struct buffer_page) > sizeof(struct page))
651 ring_buffer_page_too_big();
652
653
654 /* keep it in its own cache line */ 646 /* keep it in its own cache line */
655 buffer = kzalloc(ALIGN(sizeof(*buffer), cache_line_size()), 647 buffer = kzalloc(ALIGN(sizeof(*buffer), cache_line_size()),
656 GFP_KERNEL); 648 GFP_KERNEL);
@@ -666,8 +658,8 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
666 buffer->reader_lock_key = key; 658 buffer->reader_lock_key = key;
667 659
668 /* need at least two pages */ 660 /* need at least two pages */
669 if (buffer->pages == 1) 661 if (buffer->pages < 2)
670 buffer->pages++; 662 buffer->pages = 2;
671 663
672 /* 664 /*
673 * In case of non-hotplug cpu, if the ring-buffer is allocated 665 * In case of non-hotplug cpu, if the ring-buffer is allocated
@@ -1011,12 +1003,12 @@ rb_event_index(struct ring_buffer_event *event)
1011{ 1003{
1012 unsigned long addr = (unsigned long)event; 1004 unsigned long addr = (unsigned long)event;
1013 1005
1014 return (addr & ~PAGE_MASK) - (PAGE_SIZE - BUF_PAGE_SIZE); 1006 return (addr & ~PAGE_MASK) - BUF_PAGE_HDR_SIZE;
1015} 1007}
1016 1008
1017static inline int 1009static inline int
1018rb_is_commit(struct ring_buffer_per_cpu *cpu_buffer, 1010rb_event_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
1019 struct ring_buffer_event *event) 1011 struct ring_buffer_event *event)
1020{ 1012{
1021 unsigned long addr = (unsigned long)event; 1013 unsigned long addr = (unsigned long)event;
1022 unsigned long index; 1014 unsigned long index;
@@ -1029,31 +1021,6 @@ rb_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
1029} 1021}
1030 1022
1031static void 1023static void
1032rb_set_commit_event(struct ring_buffer_per_cpu *cpu_buffer,
1033 struct ring_buffer_event *event)
1034{
1035 unsigned long addr = (unsigned long)event;
1036 unsigned long index;
1037
1038 index = rb_event_index(event);
1039 addr &= PAGE_MASK;
1040
1041 while (cpu_buffer->commit_page->page != (void *)addr) {
1042 if (RB_WARN_ON(cpu_buffer,
1043 cpu_buffer->commit_page == cpu_buffer->tail_page))
1044 return;
1045 cpu_buffer->commit_page->page->commit =
1046 cpu_buffer->commit_page->write;
1047 rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
1048 cpu_buffer->write_stamp =
1049 cpu_buffer->commit_page->page->time_stamp;
1050 }
1051
1052 /* Now set the commit to the event's index */
1053 local_set(&cpu_buffer->commit_page->page->commit, index);
1054}
1055
1056static void
1057rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer) 1024rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
1058{ 1025{
1059 /* 1026 /*
@@ -1171,6 +1138,60 @@ static unsigned rb_calculate_event_length(unsigned length)
1171 return length; 1138 return length;
1172} 1139}
1173 1140
1141static inline void
1142rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
1143 struct buffer_page *tail_page,
1144 unsigned long tail, unsigned long length)
1145{
1146 struct ring_buffer_event *event;
1147
1148 /*
1149 * Only the event that crossed the page boundary
1150 * must fill the old tail_page with padding.
1151 */
1152 if (tail >= BUF_PAGE_SIZE) {
1153 local_sub(length, &tail_page->write);
1154 return;
1155 }
1156
1157 event = __rb_page_index(tail_page, tail);
1158 kmemcheck_annotate_bitfield(event, bitfield);
1159
1160 /*
1161 * If this event is bigger than the minimum size, then
1162 * we need to be careful that we don't subtract the
1163 * write counter enough to allow another writer to slip
1164 * in on this page.
1165 * We put in a discarded commit instead, to make sure
1166 * that this space is not used again.
1167 *
1168 * If we are less than the minimum size, we don't need to
1169 * worry about it.
1170 */
1171 if (tail > (BUF_PAGE_SIZE - RB_EVNT_MIN_SIZE)) {
1172 /* No room for any events */
1173
1174 /* Mark the rest of the page with padding */
1175 rb_event_set_padding(event);
1176
1177 /* Set the write back to the previous setting */
1178 local_sub(length, &tail_page->write);
1179 return;
1180 }
1181
1182 /* Put in a discarded event */
1183 event->array[0] = (BUF_PAGE_SIZE - tail) - RB_EVNT_HDR_SIZE;
1184 event->type_len = RINGBUF_TYPE_PADDING;
1185 /* time delta must be non zero */
1186 event->time_delta = 1;
1187 /* Account for this as an entry */
1188 local_inc(&tail_page->entries);
1189 local_inc(&cpu_buffer->entries);
1190
1191 /* Set write to end of buffer */
1192 length = (tail + length) - BUF_PAGE_SIZE;
1193 local_sub(length, &tail_page->write);
1194}
1174 1195
1175static struct ring_buffer_event * 1196static struct ring_buffer_event *
1176rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, 1197rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
@@ -1180,7 +1201,6 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
1180{ 1201{
1181 struct buffer_page *next_page, *head_page, *reader_page; 1202 struct buffer_page *next_page, *head_page, *reader_page;
1182 struct ring_buffer *buffer = cpu_buffer->buffer; 1203 struct ring_buffer *buffer = cpu_buffer->buffer;
1183 struct ring_buffer_event *event;
1184 bool lock_taken = false; 1204 bool lock_taken = false;
1185 unsigned long flags; 1205 unsigned long flags;
1186 1206
@@ -1265,27 +1285,7 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
1265 cpu_buffer->tail_page->page->time_stamp = *ts; 1285 cpu_buffer->tail_page->page->time_stamp = *ts;
1266 } 1286 }
1267 1287
1268 /* 1288 rb_reset_tail(cpu_buffer, tail_page, tail, length);
1269 * The actual tail page has moved forward.
1270 */
1271 if (tail < BUF_PAGE_SIZE) {
1272 /* Mark the rest of the page with padding */
1273 event = __rb_page_index(tail_page, tail);
1274 kmemcheck_annotate_bitfield(event, bitfield);
1275 rb_event_set_padding(event);
1276 }
1277
1278 /* Set the write back to the previous setting */
1279 local_sub(length, &tail_page->write);
1280
1281 /*
1282 * If this was a commit entry that failed,
1283 * increment that too
1284 */
1285 if (tail_page == cpu_buffer->commit_page &&
1286 tail == rb_commit_index(cpu_buffer)) {
1287 rb_set_commit_to_write(cpu_buffer);
1288 }
1289 1289
1290 __raw_spin_unlock(&cpu_buffer->lock); 1290 __raw_spin_unlock(&cpu_buffer->lock);
1291 local_irq_restore(flags); 1291 local_irq_restore(flags);
@@ -1295,7 +1295,7 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
1295 1295
1296 out_reset: 1296 out_reset:
1297 /* reset write */ 1297 /* reset write */
1298 local_sub(length, &tail_page->write); 1298 rb_reset_tail(cpu_buffer, tail_page, tail, length);
1299 1299
1300 if (likely(lock_taken)) 1300 if (likely(lock_taken))
1301 __raw_spin_unlock(&cpu_buffer->lock); 1301 __raw_spin_unlock(&cpu_buffer->lock);
@@ -1325,9 +1325,6 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
1325 1325
1326 /* We reserved something on the buffer */ 1326 /* We reserved something on the buffer */
1327 1327
1328 if (RB_WARN_ON(cpu_buffer, write > BUF_PAGE_SIZE))
1329 return NULL;
1330
1331 event = __rb_page_index(tail_page, tail); 1328 event = __rb_page_index(tail_page, tail);
1332 kmemcheck_annotate_bitfield(event, bitfield); 1329 kmemcheck_annotate_bitfield(event, bitfield);
1333 rb_update_event(event, type, length); 1330 rb_update_event(event, type, length);
@@ -1337,11 +1334,11 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
1337 local_inc(&tail_page->entries); 1334 local_inc(&tail_page->entries);
1338 1335
1339 /* 1336 /*
1340 * If this is a commit and the tail is zero, then update 1337 * If this is the first commit on the page, then update
1341 * this page's time stamp. 1338 * its timestamp.
1342 */ 1339 */
1343 if (!tail && rb_is_commit(cpu_buffer, event)) 1340 if (!tail)
1344 cpu_buffer->commit_page->page->time_stamp = *ts; 1341 tail_page->page->time_stamp = *ts;
1345 1342
1346 return event; 1343 return event;
1347} 1344}
@@ -1410,16 +1407,16 @@ rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
1410 return -EAGAIN; 1407 return -EAGAIN;
1411 1408
1412 /* Only a commited time event can update the write stamp */ 1409 /* Only a commited time event can update the write stamp */
1413 if (rb_is_commit(cpu_buffer, event)) { 1410 if (rb_event_is_commit(cpu_buffer, event)) {
1414 /* 1411 /*
1415 * If this is the first on the page, then we need to 1412 * If this is the first on the page, then it was
1416 * update the page itself, and just put in a zero. 1413 * updated with the page itself. Try to discard it
1414 * and if we can't just make it zero.
1417 */ 1415 */
1418 if (rb_event_index(event)) { 1416 if (rb_event_index(event)) {
1419 event->time_delta = *delta & TS_MASK; 1417 event->time_delta = *delta & TS_MASK;
1420 event->array[0] = *delta >> TS_SHIFT; 1418 event->array[0] = *delta >> TS_SHIFT;
1421 } else { 1419 } else {
1422 cpu_buffer->commit_page->page->time_stamp = *ts;
1423 /* try to discard, since we do not need this */ 1420 /* try to discard, since we do not need this */
1424 if (!rb_try_to_discard(cpu_buffer, event)) { 1421 if (!rb_try_to_discard(cpu_buffer, event)) {
1425 /* nope, just zero it */ 1422 /* nope, just zero it */
@@ -1445,6 +1442,44 @@ rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
1445 return ret; 1442 return ret;
1446} 1443}
1447 1444
1445static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer)
1446{
1447 local_inc(&cpu_buffer->committing);
1448 local_inc(&cpu_buffer->commits);
1449}
1450
1451static void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer)
1452{
1453 unsigned long commits;
1454
1455 if (RB_WARN_ON(cpu_buffer,
1456 !local_read(&cpu_buffer->committing)))
1457 return;
1458
1459 again:
1460 commits = local_read(&cpu_buffer->commits);
1461 /* synchronize with interrupts */
1462 barrier();
1463 if (local_read(&cpu_buffer->committing) == 1)
1464 rb_set_commit_to_write(cpu_buffer);
1465
1466 local_dec(&cpu_buffer->committing);
1467
1468 /* synchronize with interrupts */
1469 barrier();
1470
1471 /*
1472 * Need to account for interrupts coming in between the
1473 * updating of the commit page and the clearing of the
1474 * committing counter.
1475 */
1476 if (unlikely(local_read(&cpu_buffer->commits) != commits) &&
1477 !local_read(&cpu_buffer->committing)) {
1478 local_inc(&cpu_buffer->committing);
1479 goto again;
1480 }
1481}
1482
1448static struct ring_buffer_event * 1483static struct ring_buffer_event *
1449rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, 1484rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
1450 unsigned long length) 1485 unsigned long length)
@@ -1454,6 +1489,8 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
1454 int commit = 0; 1489 int commit = 0;
1455 int nr_loops = 0; 1490 int nr_loops = 0;
1456 1491
1492 rb_start_commit(cpu_buffer);
1493
1457 length = rb_calculate_event_length(length); 1494 length = rb_calculate_event_length(length);
1458 again: 1495 again:
1459 /* 1496 /*
@@ -1466,7 +1503,7 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
1466 * Bail! 1503 * Bail!
1467 */ 1504 */
1468 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000)) 1505 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000))
1469 return NULL; 1506 goto out_fail;
1470 1507
1471 ts = rb_time_stamp(cpu_buffer->buffer, cpu_buffer->cpu); 1508 ts = rb_time_stamp(cpu_buffer->buffer, cpu_buffer->cpu);
1472 1509
@@ -1497,7 +1534,7 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
1497 1534
1498 commit = rb_add_time_stamp(cpu_buffer, &ts, &delta); 1535 commit = rb_add_time_stamp(cpu_buffer, &ts, &delta);
1499 if (commit == -EBUSY) 1536 if (commit == -EBUSY)
1500 return NULL; 1537 goto out_fail;
1501 1538
1502 if (commit == -EAGAIN) 1539 if (commit == -EAGAIN)
1503 goto again; 1540 goto again;
@@ -1511,28 +1548,19 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
1511 if (unlikely(PTR_ERR(event) == -EAGAIN)) 1548 if (unlikely(PTR_ERR(event) == -EAGAIN))
1512 goto again; 1549 goto again;
1513 1550
1514 if (!event) { 1551 if (!event)
1515 if (unlikely(commit)) 1552 goto out_fail;
1516 /*
1517 * Ouch! We needed a timestamp and it was commited. But
1518 * we didn't get our event reserved.
1519 */
1520 rb_set_commit_to_write(cpu_buffer);
1521 return NULL;
1522 }
1523 1553
1524 /* 1554 if (!rb_event_is_commit(cpu_buffer, event))
1525 * If the timestamp was commited, make the commit our entry
1526 * now so that we will update it when needed.
1527 */
1528 if (unlikely(commit))
1529 rb_set_commit_event(cpu_buffer, event);
1530 else if (!rb_is_commit(cpu_buffer, event))
1531 delta = 0; 1555 delta = 0;
1532 1556
1533 event->time_delta = delta; 1557 event->time_delta = delta;
1534 1558
1535 return event; 1559 return event;
1560
1561 out_fail:
1562 rb_end_commit(cpu_buffer);
1563 return NULL;
1536} 1564}
1537 1565
1538#define TRACE_RECURSIVE_DEPTH 16 1566#define TRACE_RECURSIVE_DEPTH 16
@@ -1642,13 +1670,14 @@ static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
1642{ 1670{
1643 local_inc(&cpu_buffer->entries); 1671 local_inc(&cpu_buffer->entries);
1644 1672
1645 /* Only process further if we own the commit */ 1673 /*
1646 if (!rb_is_commit(cpu_buffer, event)) 1674 * The event first in the commit queue updates the
1647 return; 1675 * time stamp.
1648 1676 */
1649 cpu_buffer->write_stamp += event->time_delta; 1677 if (rb_event_is_commit(cpu_buffer, event))
1678 cpu_buffer->write_stamp += event->time_delta;
1650 1679
1651 rb_set_commit_to_write(cpu_buffer); 1680 rb_end_commit(cpu_buffer);
1652} 1681}
1653 1682
1654/** 1683/**
@@ -1737,15 +1766,15 @@ void ring_buffer_discard_commit(struct ring_buffer *buffer,
1737 /* The event is discarded regardless */ 1766 /* The event is discarded regardless */
1738 rb_event_discard(event); 1767 rb_event_discard(event);
1739 1768
1769 cpu = smp_processor_id();
1770 cpu_buffer = buffer->buffers[cpu];
1771
1740 /* 1772 /*
1741 * This must only be called if the event has not been 1773 * This must only be called if the event has not been
1742 * committed yet. Thus we can assume that preemption 1774 * committed yet. Thus we can assume that preemption
1743 * is still disabled. 1775 * is still disabled.
1744 */ 1776 */
1745 RB_WARN_ON(buffer, preemptible()); 1777 RB_WARN_ON(buffer, !local_read(&cpu_buffer->committing));
1746
1747 cpu = smp_processor_id();
1748 cpu_buffer = buffer->buffers[cpu];
1749 1778
1750 if (!rb_try_to_discard(cpu_buffer, event)) 1779 if (!rb_try_to_discard(cpu_buffer, event))
1751 goto out; 1780 goto out;
@@ -1756,13 +1785,7 @@ void ring_buffer_discard_commit(struct ring_buffer *buffer,
1756 */ 1785 */
1757 local_inc(&cpu_buffer->entries); 1786 local_inc(&cpu_buffer->entries);
1758 out: 1787 out:
1759 /* 1788 rb_end_commit(cpu_buffer);
1760 * If a write came in and pushed the tail page
1761 * we still need to update the commit pointer
1762 * if we were the commit.
1763 */
1764 if (rb_is_commit(cpu_buffer, event))
1765 rb_set_commit_to_write(cpu_buffer);
1766 1789
1767 trace_recursive_unlock(); 1790 trace_recursive_unlock();
1768 1791
@@ -2446,6 +2469,21 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
2446} 2469}
2447EXPORT_SYMBOL_GPL(ring_buffer_iter_peek); 2470EXPORT_SYMBOL_GPL(ring_buffer_iter_peek);
2448 2471
2472static inline int rb_ok_to_lock(void)
2473{
2474 /*
2475 * If an NMI die dumps out the content of the ring buffer
2476 * do not grab locks. We also permanently disable the ring
2477 * buffer too. A one time deal is all you get from reading
2478 * the ring buffer from an NMI.
2479 */
2480 if (likely(!in_nmi() && !oops_in_progress))
2481 return 1;
2482
2483 tracing_off_permanent();
2484 return 0;
2485}
2486
2449/** 2487/**
2450 * ring_buffer_peek - peek at the next event to be read 2488 * ring_buffer_peek - peek at the next event to be read
2451 * @buffer: The ring buffer to read 2489 * @buffer: The ring buffer to read
@@ -2461,14 +2499,20 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
2461 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; 2499 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
2462 struct ring_buffer_event *event; 2500 struct ring_buffer_event *event;
2463 unsigned long flags; 2501 unsigned long flags;
2502 int dolock;
2464 2503
2465 if (!cpumask_test_cpu(cpu, buffer->cpumask)) 2504 if (!cpumask_test_cpu(cpu, buffer->cpumask))
2466 return NULL; 2505 return NULL;
2467 2506
2507 dolock = rb_ok_to_lock();
2468 again: 2508 again:
2469 spin_lock_irqsave(&cpu_buffer->reader_lock, flags); 2509 local_irq_save(flags);
2510 if (dolock)
2511 spin_lock(&cpu_buffer->reader_lock);
2470 event = rb_buffer_peek(buffer, cpu, ts); 2512 event = rb_buffer_peek(buffer, cpu, ts);
2471 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 2513 if (dolock)
2514 spin_unlock(&cpu_buffer->reader_lock);
2515 local_irq_restore(flags);
2472 2516
2473 if (event && event->type_len == RINGBUF_TYPE_PADDING) { 2517 if (event && event->type_len == RINGBUF_TYPE_PADDING) {
2474 cpu_relax(); 2518 cpu_relax();
@@ -2520,6 +2564,9 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
2520 struct ring_buffer_per_cpu *cpu_buffer; 2564 struct ring_buffer_per_cpu *cpu_buffer;
2521 struct ring_buffer_event *event = NULL; 2565 struct ring_buffer_event *event = NULL;
2522 unsigned long flags; 2566 unsigned long flags;
2567 int dolock;
2568
2569 dolock = rb_ok_to_lock();
2523 2570
2524 again: 2571 again:
2525 /* might be called in atomic */ 2572 /* might be called in atomic */
@@ -2529,7 +2576,9 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
2529 goto out; 2576 goto out;
2530 2577
2531 cpu_buffer = buffer->buffers[cpu]; 2578 cpu_buffer = buffer->buffers[cpu];
2532 spin_lock_irqsave(&cpu_buffer->reader_lock, flags); 2579 local_irq_save(flags);
2580 if (dolock)
2581 spin_lock(&cpu_buffer->reader_lock);
2533 2582
2534 event = rb_buffer_peek(buffer, cpu, ts); 2583 event = rb_buffer_peek(buffer, cpu, ts);
2535 if (!event) 2584 if (!event)
@@ -2538,7 +2587,9 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
2538 rb_advance_reader(cpu_buffer); 2587 rb_advance_reader(cpu_buffer);
2539 2588
2540 out_unlock: 2589 out_unlock:
2541 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 2590 if (dolock)
2591 spin_unlock(&cpu_buffer->reader_lock);
2592 local_irq_restore(flags);
2542 2593
2543 out: 2594 out:
2544 preempt_enable(); 2595 preempt_enable();
@@ -2680,6 +2731,8 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
2680 cpu_buffer->overrun = 0; 2731 cpu_buffer->overrun = 0;
2681 cpu_buffer->read = 0; 2732 cpu_buffer->read = 0;
2682 local_set(&cpu_buffer->entries, 0); 2733 local_set(&cpu_buffer->entries, 0);
2734 local_set(&cpu_buffer->committing, 0);
2735 local_set(&cpu_buffer->commits, 0);
2683 2736
2684 cpu_buffer->write_stamp = 0; 2737 cpu_buffer->write_stamp = 0;
2685 cpu_buffer->read_stamp = 0; 2738 cpu_buffer->read_stamp = 0;
@@ -2734,12 +2787,25 @@ EXPORT_SYMBOL_GPL(ring_buffer_reset);
2734int ring_buffer_empty(struct ring_buffer *buffer) 2787int ring_buffer_empty(struct ring_buffer *buffer)
2735{ 2788{
2736 struct ring_buffer_per_cpu *cpu_buffer; 2789 struct ring_buffer_per_cpu *cpu_buffer;
2790 unsigned long flags;
2791 int dolock;
2737 int cpu; 2792 int cpu;
2793 int ret;
2794
2795 dolock = rb_ok_to_lock();
2738 2796
2739 /* yes this is racy, but if you don't like the race, lock the buffer */ 2797 /* yes this is racy, but if you don't like the race, lock the buffer */
2740 for_each_buffer_cpu(buffer, cpu) { 2798 for_each_buffer_cpu(buffer, cpu) {
2741 cpu_buffer = buffer->buffers[cpu]; 2799 cpu_buffer = buffer->buffers[cpu];
2742 if (!rb_per_cpu_empty(cpu_buffer)) 2800 local_irq_save(flags);
2801 if (dolock)
2802 spin_lock(&cpu_buffer->reader_lock);
2803 ret = rb_per_cpu_empty(cpu_buffer);
2804 if (dolock)
2805 spin_unlock(&cpu_buffer->reader_lock);
2806 local_irq_restore(flags);
2807
2808 if (!ret)
2743 return 0; 2809 return 0;
2744 } 2810 }
2745 2811
@@ -2755,14 +2821,23 @@ EXPORT_SYMBOL_GPL(ring_buffer_empty);
2755int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu) 2821int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu)
2756{ 2822{
2757 struct ring_buffer_per_cpu *cpu_buffer; 2823 struct ring_buffer_per_cpu *cpu_buffer;
2824 unsigned long flags;
2825 int dolock;
2758 int ret; 2826 int ret;
2759 2827
2760 if (!cpumask_test_cpu(cpu, buffer->cpumask)) 2828 if (!cpumask_test_cpu(cpu, buffer->cpumask))
2761 return 1; 2829 return 1;
2762 2830
2831 dolock = rb_ok_to_lock();
2832
2763 cpu_buffer = buffer->buffers[cpu]; 2833 cpu_buffer = buffer->buffers[cpu];
2834 local_irq_save(flags);
2835 if (dolock)
2836 spin_lock(&cpu_buffer->reader_lock);
2764 ret = rb_per_cpu_empty(cpu_buffer); 2837 ret = rb_per_cpu_empty(cpu_buffer);
2765 2838 if (dolock)
2839 spin_unlock(&cpu_buffer->reader_lock);
2840 local_irq_restore(flags);
2766 2841
2767 return ret; 2842 return ret;
2768} 2843}
@@ -3108,7 +3183,7 @@ static int rb_cpu_notify(struct notifier_block *self,
3108 switch (action) { 3183 switch (action) {
3109 case CPU_UP_PREPARE: 3184 case CPU_UP_PREPARE:
3110 case CPU_UP_PREPARE_FROZEN: 3185 case CPU_UP_PREPARE_FROZEN:
3111 if (cpu_isset(cpu, *buffer->cpumask)) 3186 if (cpumask_test_cpu(cpu, buffer->cpumask))
3112 return NOTIFY_OK; 3187 return NOTIFY_OK;
3113 3188
3114 buffer->buffers[cpu] = 3189 buffer->buffers[cpu] =
@@ -3119,7 +3194,7 @@ static int rb_cpu_notify(struct notifier_block *self,
3119 return NOTIFY_OK; 3194 return NOTIFY_OK;
3120 } 3195 }
3121 smp_wmb(); 3196 smp_wmb();
3122 cpu_set(cpu, *buffer->cpumask); 3197 cpumask_set_cpu(cpu, buffer->cpumask);
3123 break; 3198 break;
3124 case CPU_DOWN_PREPARE: 3199 case CPU_DOWN_PREPARE:
3125 case CPU_DOWN_PREPARE_FROZEN: 3200 case CPU_DOWN_PREPARE_FROZEN:
diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c
index 8d68e149a8b3..573d3cc762c3 100644
--- a/kernel/trace/ring_buffer_benchmark.c
+++ b/kernel/trace/ring_buffer_benchmark.c
@@ -102,8 +102,10 @@ static enum event_status read_page(int cpu)
102 event = (void *)&rpage->data[i]; 102 event = (void *)&rpage->data[i];
103 switch (event->type_len) { 103 switch (event->type_len) {
104 case RINGBUF_TYPE_PADDING: 104 case RINGBUF_TYPE_PADDING:
105 /* We don't expect any padding */ 105 /* failed writes may be discarded events */
106 KILL_TEST(); 106 if (!event->time_delta)
107 KILL_TEST();
108 inc = event->array[0] + 4;
107 break; 109 break;
108 case RINGBUF_TYPE_TIME_EXTEND: 110 case RINGBUF_TYPE_TIME_EXTEND:
109 inc = 8; 111 inc = 8;
@@ -119,7 +121,7 @@ static enum event_status read_page(int cpu)
119 KILL_TEST(); 121 KILL_TEST();
120 break; 122 break;
121 } 123 }
122 inc = event->array[0]; 124 inc = event->array[0] + 4;
123 break; 125 break;
124 default: 126 default:
125 entry = ring_buffer_event_data(event); 127 entry = ring_buffer_event_data(event);
@@ -201,7 +203,7 @@ static void ring_buffer_producer(void)
201 * Hammer the buffer for 10 secs (this may 203 * Hammer the buffer for 10 secs (this may
202 * make the system stall) 204 * make the system stall)
203 */ 205 */
204 pr_info("Starting ring buffer hammer\n"); 206 trace_printk("Starting ring buffer hammer\n");
205 do_gettimeofday(&start_tv); 207 do_gettimeofday(&start_tv);
206 do { 208 do {
207 struct ring_buffer_event *event; 209 struct ring_buffer_event *event;
@@ -237,7 +239,7 @@ static void ring_buffer_producer(void)
237#endif 239#endif
238 240
239 } while (end_tv.tv_sec < (start_tv.tv_sec + RUN_TIME) && !kill_test); 241 } while (end_tv.tv_sec < (start_tv.tv_sec + RUN_TIME) && !kill_test);
240 pr_info("End ring buffer hammer\n"); 242 trace_printk("End ring buffer hammer\n");
241 243
242 if (consumer) { 244 if (consumer) {
243 /* Init both completions here to avoid races */ 245 /* Init both completions here to avoid races */
@@ -260,49 +262,50 @@ static void ring_buffer_producer(void)
260 overruns = ring_buffer_overruns(buffer); 262 overruns = ring_buffer_overruns(buffer);
261 263
262 if (kill_test) 264 if (kill_test)
263 pr_info("ERROR!\n"); 265 trace_printk("ERROR!\n");
264 pr_info("Time: %lld (usecs)\n", time); 266 trace_printk("Time: %lld (usecs)\n", time);
265 pr_info("Overruns: %lld\n", overruns); 267 trace_printk("Overruns: %lld\n", overruns);
266 if (disable_reader) 268 if (disable_reader)
267 pr_info("Read: (reader disabled)\n"); 269 trace_printk("Read: (reader disabled)\n");
268 else 270 else
269 pr_info("Read: %ld (by %s)\n", read, 271 trace_printk("Read: %ld (by %s)\n", read,
270 read_events ? "events" : "pages"); 272 read_events ? "events" : "pages");
271 pr_info("Entries: %lld\n", entries); 273 trace_printk("Entries: %lld\n", entries);
272 pr_info("Total: %lld\n", entries + overruns + read); 274 trace_printk("Total: %lld\n", entries + overruns + read);
273 pr_info("Missed: %ld\n", missed); 275 trace_printk("Missed: %ld\n", missed);
274 pr_info("Hit: %ld\n", hit); 276 trace_printk("Hit: %ld\n", hit);
275 277
276 /* Convert time from usecs to millisecs */ 278 /* Convert time from usecs to millisecs */
277 do_div(time, USEC_PER_MSEC); 279 do_div(time, USEC_PER_MSEC);
278 if (time) 280 if (time)
279 hit /= (long)time; 281 hit /= (long)time;
280 else 282 else
281 pr_info("TIME IS ZERO??\n"); 283 trace_printk("TIME IS ZERO??\n");
282 284
283 pr_info("Entries per millisec: %ld\n", hit); 285 trace_printk("Entries per millisec: %ld\n", hit);
284 286
285 if (hit) { 287 if (hit) {
286 /* Calculate the average time in nanosecs */ 288 /* Calculate the average time in nanosecs */
287 avg = NSEC_PER_MSEC / hit; 289 avg = NSEC_PER_MSEC / hit;
288 pr_info("%ld ns per entry\n", avg); 290 trace_printk("%ld ns per entry\n", avg);
289 } 291 }
290 292
291 if (missed) { 293 if (missed) {
292 if (time) 294 if (time)
293 missed /= (long)time; 295 missed /= (long)time;
294 296
295 pr_info("Total iterations per millisec: %ld\n", hit + missed); 297 trace_printk("Total iterations per millisec: %ld\n",
298 hit + missed);
296 299
297 /* it is possible that hit + missed will overflow and be zero */ 300 /* it is possible that hit + missed will overflow and be zero */
298 if (!(hit + missed)) { 301 if (!(hit + missed)) {
299 pr_info("hit + missed overflowed and totalled zero!\n"); 302 trace_printk("hit + missed overflowed and totalled zero!\n");
300 hit--; /* make it non zero */ 303 hit--; /* make it non zero */
301 } 304 }
302 305
303 /* Caculate the average time in nanosecs */ 306 /* Caculate the average time in nanosecs */
304 avg = NSEC_PER_MSEC / (hit + missed); 307 avg = NSEC_PER_MSEC / (hit + missed);
305 pr_info("%ld ns per entry\n", avg); 308 trace_printk("%ld ns per entry\n", avg);
306 } 309 }
307} 310}
308 311
@@ -353,7 +356,7 @@ static int ring_buffer_producer_thread(void *arg)
353 356
354 ring_buffer_producer(); 357 ring_buffer_producer();
355 358
356 pr_info("Sleeping for 10 secs\n"); 359 trace_printk("Sleeping for 10 secs\n");
357 set_current_state(TASK_INTERRUPTIBLE); 360 set_current_state(TASK_INTERRUPTIBLE);
358 schedule_timeout(HZ * SLEEP_TIME); 361 schedule_timeout(HZ * SLEEP_TIME);
359 __set_current_state(TASK_RUNNING); 362 __set_current_state(TASK_RUNNING);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index c1878bfb2e1e..076fa6f0ee48 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2191,11 +2191,12 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
2191 if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL)) 2191 if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
2192 return -ENOMEM; 2192 return -ENOMEM;
2193 2193
2194 mutex_lock(&tracing_cpumask_update_lock);
2195 err = cpumask_parse_user(ubuf, count, tracing_cpumask_new); 2194 err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
2196 if (err) 2195 if (err)
2197 goto err_unlock; 2196 goto err_unlock;
2198 2197
2198 mutex_lock(&tracing_cpumask_update_lock);
2199
2199 local_irq_disable(); 2200 local_irq_disable();
2200 __raw_spin_lock(&ftrace_max_lock); 2201 __raw_spin_lock(&ftrace_max_lock);
2201 for_each_tracing_cpu(cpu) { 2202 for_each_tracing_cpu(cpu) {
@@ -2223,8 +2224,7 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
2223 return count; 2224 return count;
2224 2225
2225err_unlock: 2226err_unlock:
2226 mutex_unlock(&tracing_cpumask_update_lock); 2227 free_cpumask_var(tracing_cpumask_new);
2227 free_cpumask_var(tracing_cpumask);
2228 2228
2229 return err; 2229 return err;
2230} 2230}
@@ -3626,7 +3626,7 @@ tracing_stats_read(struct file *filp, char __user *ubuf,
3626 struct trace_seq *s; 3626 struct trace_seq *s;
3627 unsigned long cnt; 3627 unsigned long cnt;
3628 3628
3629 s = kmalloc(sizeof(*s), GFP_ATOMIC); 3629 s = kmalloc(sizeof(*s), GFP_KERNEL);
3630 if (!s) 3630 if (!s)
3631 return ENOMEM; 3631 return ENOMEM;
3632 3632
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index db6e54bdb596..936c621bbf46 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -27,8 +27,6 @@
27#include "trace.h" 27#include "trace.h"
28#include "trace_output.h" 28#include "trace_output.h"
29 29
30static DEFINE_MUTEX(filter_mutex);
31
32enum filter_op_ids 30enum filter_op_ids
33{ 31{
34 OP_OR, 32 OP_OR,
@@ -178,7 +176,7 @@ static int filter_pred_string(struct filter_pred *pred, void *event,
178static int filter_pred_strloc(struct filter_pred *pred, void *event, 176static int filter_pred_strloc(struct filter_pred *pred, void *event,
179 int val1, int val2) 177 int val1, int val2)
180{ 178{
181 int str_loc = *(int *)(event + pred->offset); 179 unsigned short str_loc = *(unsigned short *)(event + pred->offset);
182 char *addr = (char *)(event + str_loc); 180 char *addr = (char *)(event + str_loc);
183 int cmp, match; 181 int cmp, match;
184 182
@@ -294,12 +292,12 @@ void print_event_filter(struct ftrace_event_call *call, struct trace_seq *s)
294{ 292{
295 struct event_filter *filter = call->filter; 293 struct event_filter *filter = call->filter;
296 294
297 mutex_lock(&filter_mutex); 295 mutex_lock(&event_mutex);
298 if (filter->filter_string) 296 if (filter->filter_string)
299 trace_seq_printf(s, "%s\n", filter->filter_string); 297 trace_seq_printf(s, "%s\n", filter->filter_string);
300 else 298 else
301 trace_seq_printf(s, "none\n"); 299 trace_seq_printf(s, "none\n");
302 mutex_unlock(&filter_mutex); 300 mutex_unlock(&event_mutex);
303} 301}
304 302
305void print_subsystem_event_filter(struct event_subsystem *system, 303void print_subsystem_event_filter(struct event_subsystem *system,
@@ -307,12 +305,12 @@ void print_subsystem_event_filter(struct event_subsystem *system,
307{ 305{
308 struct event_filter *filter = system->filter; 306 struct event_filter *filter = system->filter;
309 307
310 mutex_lock(&filter_mutex); 308 mutex_lock(&event_mutex);
311 if (filter->filter_string) 309 if (filter->filter_string)
312 trace_seq_printf(s, "%s\n", filter->filter_string); 310 trace_seq_printf(s, "%s\n", filter->filter_string);
313 else 311 else
314 trace_seq_printf(s, "none\n"); 312 trace_seq_printf(s, "none\n");
315 mutex_unlock(&filter_mutex); 313 mutex_unlock(&event_mutex);
316} 314}
317 315
318static struct ftrace_event_field * 316static struct ftrace_event_field *
@@ -381,6 +379,7 @@ void destroy_preds(struct ftrace_event_call *call)
381 filter_free_pred(filter->preds[i]); 379 filter_free_pred(filter->preds[i]);
382 } 380 }
383 kfree(filter->preds); 381 kfree(filter->preds);
382 kfree(filter->filter_string);
384 kfree(filter); 383 kfree(filter);
385 call->filter = NULL; 384 call->filter = NULL;
386} 385}
@@ -433,7 +432,6 @@ static void filter_free_subsystem_preds(struct event_subsystem *system)
433 filter->n_preds = 0; 432 filter->n_preds = 0;
434 } 433 }
435 434
436 mutex_lock(&event_mutex);
437 list_for_each_entry(call, &ftrace_events, list) { 435 list_for_each_entry(call, &ftrace_events, list) {
438 if (!call->define_fields) 436 if (!call->define_fields)
439 continue; 437 continue;
@@ -443,7 +441,6 @@ static void filter_free_subsystem_preds(struct event_subsystem *system)
443 remove_filter_string(call->filter); 441 remove_filter_string(call->filter);
444 } 442 }
445 } 443 }
446 mutex_unlock(&event_mutex);
447} 444}
448 445
449static int filter_add_pred_fn(struct filter_parse_state *ps, 446static int filter_add_pred_fn(struct filter_parse_state *ps,
@@ -546,6 +543,7 @@ static int filter_add_pred(struct filter_parse_state *ps,
546 filter_pred_fn_t fn; 543 filter_pred_fn_t fn;
547 unsigned long long val; 544 unsigned long long val;
548 int string_type; 545 int string_type;
546 int ret;
549 547
550 pred->fn = filter_pred_none; 548 pred->fn = filter_pred_none;
551 549
@@ -581,7 +579,11 @@ static int filter_add_pred(struct filter_parse_state *ps,
581 pred->not = 1; 579 pred->not = 1;
582 return filter_add_pred_fn(ps, call, pred, fn); 580 return filter_add_pred_fn(ps, call, pred, fn);
583 } else { 581 } else {
584 if (strict_strtoull(pred->str_val, 0, &val)) { 582 if (field->is_signed)
583 ret = strict_strtoll(pred->str_val, 0, &val);
584 else
585 ret = strict_strtoull(pred->str_val, 0, &val);
586 if (ret) {
585 parse_error(ps, FILT_ERR_ILLEGAL_INTVAL, 0); 587 parse_error(ps, FILT_ERR_ILLEGAL_INTVAL, 0);
586 return -EINVAL; 588 return -EINVAL;
587 } 589 }
@@ -625,7 +627,6 @@ static int filter_add_subsystem_pred(struct filter_parse_state *ps,
625 filter->preds[filter->n_preds] = pred; 627 filter->preds[filter->n_preds] = pred;
626 filter->n_preds++; 628 filter->n_preds++;
627 629
628 mutex_lock(&event_mutex);
629 list_for_each_entry(call, &ftrace_events, list) { 630 list_for_each_entry(call, &ftrace_events, list) {
630 631
631 if (!call->define_fields) 632 if (!call->define_fields)
@@ -636,14 +637,12 @@ static int filter_add_subsystem_pred(struct filter_parse_state *ps,
636 637
637 err = filter_add_pred(ps, call, pred); 638 err = filter_add_pred(ps, call, pred);
638 if (err) { 639 if (err) {
639 mutex_unlock(&event_mutex);
640 filter_free_subsystem_preds(system); 640 filter_free_subsystem_preds(system);
641 parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0); 641 parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0);
642 goto out; 642 goto out;
643 } 643 }
644 replace_filter_string(call->filter, filter_string); 644 replace_filter_string(call->filter, filter_string);
645 } 645 }
646 mutex_unlock(&event_mutex);
647out: 646out:
648 return err; 647 return err;
649} 648}
@@ -1070,12 +1069,12 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
1070 1069
1071 struct filter_parse_state *ps; 1070 struct filter_parse_state *ps;
1072 1071
1073 mutex_lock(&filter_mutex); 1072 mutex_lock(&event_mutex);
1074 1073
1075 if (!strcmp(strstrip(filter_string), "0")) { 1074 if (!strcmp(strstrip(filter_string), "0")) {
1076 filter_disable_preds(call); 1075 filter_disable_preds(call);
1077 remove_filter_string(call->filter); 1076 remove_filter_string(call->filter);
1078 mutex_unlock(&filter_mutex); 1077 mutex_unlock(&event_mutex);
1079 return 0; 1078 return 0;
1080 } 1079 }
1081 1080
@@ -1103,7 +1102,7 @@ out:
1103 postfix_clear(ps); 1102 postfix_clear(ps);
1104 kfree(ps); 1103 kfree(ps);
1105out_unlock: 1104out_unlock:
1106 mutex_unlock(&filter_mutex); 1105 mutex_unlock(&event_mutex);
1107 1106
1108 return err; 1107 return err;
1109} 1108}
@@ -1115,12 +1114,12 @@ int apply_subsystem_event_filter(struct event_subsystem *system,
1115 1114
1116 struct filter_parse_state *ps; 1115 struct filter_parse_state *ps;
1117 1116
1118 mutex_lock(&filter_mutex); 1117 mutex_lock(&event_mutex);
1119 1118
1120 if (!strcmp(strstrip(filter_string), "0")) { 1119 if (!strcmp(strstrip(filter_string), "0")) {
1121 filter_free_subsystem_preds(system); 1120 filter_free_subsystem_preds(system);
1122 remove_filter_string(system->filter); 1121 remove_filter_string(system->filter);
1123 mutex_unlock(&filter_mutex); 1122 mutex_unlock(&event_mutex);
1124 return 0; 1123 return 0;
1125 } 1124 }
1126 1125
@@ -1148,7 +1147,7 @@ out:
1148 postfix_clear(ps); 1147 postfix_clear(ps);
1149 kfree(ps); 1148 kfree(ps);
1150out_unlock: 1149out_unlock:
1151 mutex_unlock(&filter_mutex); 1150 mutex_unlock(&event_mutex);
1152 1151
1153 return err; 1152 return err;
1154} 1153}
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index c9a0b7df44ff..90f134764837 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -193,9 +193,11 @@ static void tracing_start_function_trace(void)
193static void tracing_stop_function_trace(void) 193static void tracing_stop_function_trace(void)
194{ 194{
195 ftrace_function_enabled = 0; 195 ftrace_function_enabled = 0;
196 /* OK if they are not registered */ 196
197 unregister_ftrace_function(&trace_stack_ops); 197 if (func_flags.val & TRACE_FUNC_OPT_STACK)
198 unregister_ftrace_function(&trace_ops); 198 unregister_ftrace_function(&trace_stack_ops);
199 else
200 unregister_ftrace_function(&trace_ops);
199} 201}
200 202
201static int func_set_flag(u32 old_flags, u32 bit, int set) 203static int func_set_flag(u32 old_flags, u32 bit, int set)
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 8b592418d8b2..d2249abafb53 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -57,7 +57,8 @@ static struct tracer_flags tracer_flags = {
57 57
58/* Add a function return address to the trace stack on thread info.*/ 58/* Add a function return address to the trace stack on thread info.*/
59int 59int
60ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth) 60ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth,
61 unsigned long frame_pointer)
61{ 62{
62 unsigned long long calltime; 63 unsigned long long calltime;
63 int index; 64 int index;
@@ -85,6 +86,7 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth)
85 current->ret_stack[index].func = func; 86 current->ret_stack[index].func = func;
86 current->ret_stack[index].calltime = calltime; 87 current->ret_stack[index].calltime = calltime;
87 current->ret_stack[index].subtime = 0; 88 current->ret_stack[index].subtime = 0;
89 current->ret_stack[index].fp = frame_pointer;
88 *depth = index; 90 *depth = index;
89 91
90 return 0; 92 return 0;
@@ -92,7 +94,8 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth)
92 94
93/* Retrieve a function return address to the trace stack on thread info.*/ 95/* Retrieve a function return address to the trace stack on thread info.*/
94static void 96static void
95ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret) 97ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret,
98 unsigned long frame_pointer)
96{ 99{
97 int index; 100 int index;
98 101
@@ -106,6 +109,31 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret)
106 return; 109 return;
107 } 110 }
108 111
112#ifdef CONFIG_HAVE_FUNCTION_GRAPH_FP_TEST
113 /*
114 * The arch may choose to record the frame pointer used
115 * and check it here to make sure that it is what we expect it
116 * to be. If gcc does not set the place holder of the return
117 * address in the frame pointer, and does a copy instead, then
118 * the function graph trace will fail. This test detects this
119 * case.
120 *
121 * Currently, x86_32 with optimize for size (-Os) makes the latest
122 * gcc do the above.
123 */
124 if (unlikely(current->ret_stack[index].fp != frame_pointer)) {
125 ftrace_graph_stop();
126 WARN(1, "Bad frame pointer: expected %lx, received %lx\n"
127 " from func %pF return to %lx\n",
128 current->ret_stack[index].fp,
129 frame_pointer,
130 (void *)current->ret_stack[index].func,
131 current->ret_stack[index].ret);
132 *ret = (unsigned long)panic;
133 return;
134 }
135#endif
136
109 *ret = current->ret_stack[index].ret; 137 *ret = current->ret_stack[index].ret;
110 trace->func = current->ret_stack[index].func; 138 trace->func = current->ret_stack[index].func;
111 trace->calltime = current->ret_stack[index].calltime; 139 trace->calltime = current->ret_stack[index].calltime;
@@ -117,12 +145,12 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret)
117 * Send the trace to the ring-buffer. 145 * Send the trace to the ring-buffer.
118 * @return the original return address. 146 * @return the original return address.
119 */ 147 */
120unsigned long ftrace_return_to_handler(void) 148unsigned long ftrace_return_to_handler(unsigned long frame_pointer)
121{ 149{
122 struct ftrace_graph_ret trace; 150 struct ftrace_graph_ret trace;
123 unsigned long ret; 151 unsigned long ret;
124 152
125 ftrace_pop_return_trace(&trace, &ret); 153 ftrace_pop_return_trace(&trace, &ret, frame_pointer);
126 trace.rettime = trace_clock_local(); 154 trace.rettime = trace_clock_local();
127 ftrace_graph_return(&trace); 155 ftrace_graph_return(&trace);
128 barrier(); 156 barrier();
diff --git a/samples/trace_events/Makefile b/samples/trace_events/Makefile
index 0d428dc67283..0f8d92120c4e 100644
--- a/samples/trace_events/Makefile
+++ b/samples/trace_events/Makefile
@@ -1,6 +1,14 @@
1# builds the trace events example kernel modules; 1# builds the trace events example kernel modules;
2# then to use one (as root): insmod <module_name.ko> 2# then to use one (as root): insmod <module_name.ko>
3 3
4# If you include a trace header outside of include/trace/events
5# then the file that does the #define CREATE_TRACE_POINTS must
6# have that tracer file in its main search path. This is because
7# define_trace.h will include it, and must be able to find it from
8# the include/trace directory.
9#
10# Here trace-events-sample.c does the CREATE_TRACE_POINTS.
11#
4CFLAGS_trace-events-sample.o := -I$(src) 12CFLAGS_trace-events-sample.o := -I$(src)
5 13
6obj-$(CONFIG_SAMPLE_TRACE_EVENTS) += trace-events-sample.o 14obj-$(CONFIG_SAMPLE_TRACE_EVENTS) += trace-events-sample.o
diff --git a/samples/trace_events/trace-events-sample.h b/samples/trace_events/trace-events-sample.h
index 128a897687c5..9977a756fb32 100644
--- a/samples/trace_events/trace-events-sample.h
+++ b/samples/trace_events/trace-events-sample.h
@@ -19,16 +19,21 @@
19 * If TRACE_SYSTEM is defined, that will be the directory created 19 * If TRACE_SYSTEM is defined, that will be the directory created
20 * in the ftrace directory under /debugfs/tracing/events/<system> 20 * in the ftrace directory under /debugfs/tracing/events/<system>
21 * 21 *
22 * The define_trace.h belowe will also look for a file name of 22 * The define_trace.h below will also look for a file name of
23 * TRACE_SYSTEM.h where TRACE_SYSTEM is what is defined here. 23 * TRACE_SYSTEM.h where TRACE_SYSTEM is what is defined here.
24 * In this case, it would look for sample.h
24 * 25 *
25 * If you want a different system than file name, you can override 26 * If the header name will be different than the system name
26 * the header name by defining TRACE_INCLUDE_FILE 27 * (as in this case), then you can override the header name that
28 * define_trace.h will look up by defining TRACE_INCLUDE_FILE
27 * 29 *
28 * If this file was called, goofy.h, then we would define: 30 * This file is called trace-events-sample.h but we want the system
31 * to be called "sample". Therefore we must define the name of this
32 * file:
29 * 33 *
30 * #define TRACE_INCLUDE_FILE goofy 34 * #define TRACE_INCLUDE_FILE trace-events-sample
31 * 35 *
36 * As we do an the bottom of this file.
32 */ 37 */
33#undef TRACE_SYSTEM 38#undef TRACE_SYSTEM
34#define TRACE_SYSTEM sample 39#define TRACE_SYSTEM sample
@@ -99,13 +104,13 @@ TRACE_EVENT(foo_bar,
99 * 104 *
100 * #define TRACE_INCLUDE_PATH ../../samples/trace_events 105 * #define TRACE_INCLUDE_PATH ../../samples/trace_events
101 * 106 *
102 * But I chose to simply make it use the current directory and then in 107 * But the safest and easiest way to simply make it use the directory
103 * the Makefile I added: 108 * that the file is in is to add in the Makefile:
104 * 109 *
105 * CFLAGS_trace-events-sample.o := -I$(PWD)/samples/trace_events/ 110 * CFLAGS_trace-events-sample.o := -I$(src)
106 * 111 *
107 * This will make sure the current path is part of the include 112 * This will make sure the current path is part of the include
108 * structure for our file so that we can find it. 113 * structure for our file so that define_trace.h can find it.
109 * 114 *
110 * I could have made only the top level directory the include: 115 * I could have made only the top level directory the include:
111 * 116 *
@@ -115,8 +120,8 @@ TRACE_EVENT(foo_bar,
115 * 120 *
116 * #define TRACE_INCLUDE_PATH samples/trace_events 121 * #define TRACE_INCLUDE_PATH samples/trace_events
117 * 122 *
118 * But then if something defines "samples" or "trace_events" then we 123 * But then if something defines "samples" or "trace_events" as a macro
119 * could risk that being converted too, and give us an unexpected 124 * then we could risk that being converted too, and give us an unexpected
120 * result. 125 * result.
121 */ 126 */
122#undef TRACE_INCLUDE_PATH 127#undef TRACE_INCLUDE_PATH