aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/trace/trace_syscalls.c
diff options
context:
space:
mode:
authorFrederic Weisbecker <fweisbec@gmail.com>2009-11-05 22:13:05 -0500
committerIngo Molnar <mingo@elte.hu>2009-11-08 04:31:42 -0500
commit444a2a3bcd6d5bed5c823136f68fcc93c0fe283f (patch)
tree6a57308586b4e723238646074e79298845803520 /kernel/trace/trace_syscalls.c
parent09879b99d44d701c603935ef2549004405d7f8f9 (diff)
tracing, perf_events: Protect the buffer from recursion in perf
While tracing using events with perf, if one enables the lockdep:lock_acquire event, it will infect every other perf trace events. Basically, you can enable whatever set of trace events through perf but if this event is part of the set, the only result we can get is a long list of lock_acquire events of rcu read lock, and only that. This is because of a recursion inside perf. 1) When a trace event is triggered, it will fill a per cpu buffer and submit it to perf. 2) Perf will commit this event but will also protect some data using rcu_read_lock 3) A recursion appears: rcu_read_lock triggers a lock_acquire event that will fill the per cpu event and then submit the buffer to perf. 4) Perf detects a recursion and ignores it 5) Perf continues its work on the previous event, but its buffer has been overwritten by the lock_acquire event, it has then been turned into a lock_acquire event of rcu read lock Such scenario also happens with lock_release with rcu_read_unlock(). We could turn the rcu_read_lock() into __rcu_read_lock() to drop the lock debugging from perf fast path, but that would make us lose the rcu debugging and that doesn't prevent from other possible kind of recursion from perf in the future. This patch adds a recursion protection based on a counter on the perf trace per cpu buffers to solve the problem. -v2: Fixed lost whitespace, added reviewed-by tag Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com> Reviewed-by: Masami Hiramatsu <mhiramat@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Mike Galbraith <efault@gmx.de> Cc: Paul Mackerras <paulus@samba.org> Cc: Steven Rostedt <rostedt@goodmis.org> Cc: Li Zefan <lizf@cn.fujitsu.com> Cc: Jason Baron <jbaron@redhat.com> LKML-Reference: <1257477185-7838-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/trace/trace_syscalls.c')
-rw-r--r--kernel/trace/trace_syscalls.c44
1 files changed, 36 insertions, 8 deletions
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 58b8e5370767..51213b0aa81b 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -477,6 +477,7 @@ static int sys_prof_refcount_exit;
477static void prof_syscall_enter(struct pt_regs *regs, long id) 477static void prof_syscall_enter(struct pt_regs *regs, long id)
478{ 478{
479 struct syscall_metadata *sys_data; 479 struct syscall_metadata *sys_data;
480 struct perf_trace_buf *trace_buf;
480 struct syscall_trace_enter *rec; 481 struct syscall_trace_enter *rec;
481 unsigned long flags; 482 unsigned long flags;
482 char *raw_data; 483 char *raw_data;
@@ -507,14 +508,25 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
507 cpu = smp_processor_id(); 508 cpu = smp_processor_id();
508 509
509 if (in_nmi()) 510 if (in_nmi())
510 raw_data = rcu_dereference(trace_profile_buf_nmi); 511 trace_buf = rcu_dereference(perf_trace_buf_nmi);
511 else 512 else
512 raw_data = rcu_dereference(trace_profile_buf); 513 trace_buf = rcu_dereference(perf_trace_buf);
513 514
514 if (!raw_data) 515 if (!trace_buf)
515 goto end; 516 goto end;
516 517
517 raw_data = per_cpu_ptr(raw_data, cpu); 518 trace_buf = per_cpu_ptr(trace_buf, cpu);
519
520 if (trace_buf->recursion++)
521 goto end_recursion;
522
523 /*
524 * Make recursion update visible before entering perf_tp_event
525 * so that we protect from perf recursions.
526 */
527 barrier();
528
529 raw_data = trace_buf->buf;
518 530
519 /* zero the dead bytes from align to not leak stack to user */ 531 /* zero the dead bytes from align to not leak stack to user */
520 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; 532 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
@@ -527,6 +539,8 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
527 (unsigned long *)&rec->args); 539 (unsigned long *)&rec->args);
528 perf_tp_event(sys_data->enter_id, 0, 1, rec, size); 540 perf_tp_event(sys_data->enter_id, 0, 1, rec, size);
529 541
542end_recursion:
543 trace_buf->recursion--;
530end: 544end:
531 local_irq_restore(flags); 545 local_irq_restore(flags);
532} 546}
@@ -574,6 +588,7 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
574{ 588{
575 struct syscall_metadata *sys_data; 589 struct syscall_metadata *sys_data;
576 struct syscall_trace_exit *rec; 590 struct syscall_trace_exit *rec;
591 struct perf_trace_buf *trace_buf;
577 unsigned long flags; 592 unsigned long flags;
578 int syscall_nr; 593 int syscall_nr;
579 char *raw_data; 594 char *raw_data;
@@ -605,14 +620,25 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
605 cpu = smp_processor_id(); 620 cpu = smp_processor_id();
606 621
607 if (in_nmi()) 622 if (in_nmi())
608 raw_data = rcu_dereference(trace_profile_buf_nmi); 623 trace_buf = rcu_dereference(perf_trace_buf_nmi);
609 else 624 else
610 raw_data = rcu_dereference(trace_profile_buf); 625 trace_buf = rcu_dereference(perf_trace_buf);
611 626
612 if (!raw_data) 627 if (!trace_buf)
613 goto end; 628 goto end;
614 629
615 raw_data = per_cpu_ptr(raw_data, cpu); 630 trace_buf = per_cpu_ptr(trace_buf, cpu);
631
632 if (trace_buf->recursion++)
633 goto end_recursion;
634
635 /*
636 * Make recursion update visible before entering perf_tp_event
637 * so that we protect from perf recursions.
638 */
639 barrier();
640
641 raw_data = trace_buf->buf;
616 642
617 /* zero the dead bytes from align to not leak stack to user */ 643 /* zero the dead bytes from align to not leak stack to user */
618 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; 644 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
@@ -626,6 +652,8 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
626 652
627 perf_tp_event(sys_data->exit_id, 0, 1, rec, size); 653 perf_tp_event(sys_data->exit_id, 0, 1, rec, size);
628 654
655end_recursion:
656 trace_buf->recursion--;
629end: 657end:
630 local_irq_restore(flags); 658 local_irq_restore(flags);
631} 659}