aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorFrederic Weisbecker <fweisbec@gmail.com>2009-11-05 22:13:05 -0500
committerIngo Molnar <mingo@elte.hu>2009-11-08 04:31:42 -0500
commit444a2a3bcd6d5bed5c823136f68fcc93c0fe283f (patch)
tree6a57308586b4e723238646074e79298845803520 /kernel
parent09879b99d44d701c603935ef2549004405d7f8f9 (diff)
tracing, perf_events: Protect the buffer from recursion in perf
While tracing using events with perf, if one enables the lockdep:lock_acquire event, it will infect every other perf trace events. Basically, you can enable whatever set of trace events through perf but if this event is part of the set, the only result we can get is a long list of lock_acquire events of rcu read lock, and only that. This is because of a recursion inside perf. 1) When a trace event is triggered, it will fill a per cpu buffer and submit it to perf. 2) Perf will commit this event but will also protect some data using rcu_read_lock 3) A recursion appears: rcu_read_lock triggers a lock_acquire event that will fill the per cpu event and then submit the buffer to perf. 4) Perf detects a recursion and ignores it 5) Perf continues its work on the previous event, but its buffer has been overwritten by the lock_acquire event, it has then been turned into a lock_acquire event of rcu read lock Such scenario also happens with lock_release with rcu_read_unlock(). We could turn the rcu_read_lock() into __rcu_read_lock() to drop the lock debugging from perf fast path, but that would make us lose the rcu debugging and that doesn't prevent from other possible kind of recursion from perf in the future. This patch adds a recursion protection based on a counter on the perf trace per cpu buffers to solve the problem. -v2: Fixed lost whitespace, added reviewed-by tag Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com> Reviewed-by: Masami Hiramatsu <mhiramat@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Mike Galbraith <efault@gmx.de> Cc: Paul Mackerras <paulus@samba.org> Cc: Steven Rostedt <rostedt@goodmis.org> Cc: Li Zefan <lizf@cn.fujitsu.com> Cc: Jason Baron <jbaron@redhat.com> LKML-Reference: <1257477185-7838-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/trace/trace_event_profile.c41
-rw-r--r--kernel/trace/trace_kprobe.c50
-rw-r--r--kernel/trace/trace_syscalls.c44
3 files changed, 96 insertions, 39 deletions
diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c
index c9f687ab0d4f..e0d351b01f5a 100644
--- a/kernel/trace/trace_event_profile.c
+++ b/kernel/trace/trace_event_profile.c
@@ -8,41 +8,36 @@
8#include <linux/module.h> 8#include <linux/module.h>
9#include "trace.h" 9#include "trace.h"
10 10
11/*
12 * We can't use a size but a type in alloc_percpu()
13 * So let's create a dummy type that matches the desired size
14 */
15typedef struct {char buf[FTRACE_MAX_PROFILE_SIZE];} profile_buf_t;
16 11
17char *trace_profile_buf; 12struct perf_trace_buf *perf_trace_buf;
18EXPORT_SYMBOL_GPL(trace_profile_buf); 13EXPORT_SYMBOL_GPL(perf_trace_buf);
19 14
20char *trace_profile_buf_nmi; 15struct perf_trace_buf *perf_trace_buf_nmi;
21EXPORT_SYMBOL_GPL(trace_profile_buf_nmi); 16EXPORT_SYMBOL_GPL(perf_trace_buf_nmi);
22 17
23/* Count the events in use (per event id, not per instance) */ 18/* Count the events in use (per event id, not per instance) */
24static int total_profile_count; 19static int total_profile_count;
25 20
26static int ftrace_profile_enable_event(struct ftrace_event_call *event) 21static int ftrace_profile_enable_event(struct ftrace_event_call *event)
27{ 22{
28 char *buf; 23 struct perf_trace_buf *buf;
29 int ret = -ENOMEM; 24 int ret = -ENOMEM;
30 25
31 if (atomic_inc_return(&event->profile_count)) 26 if (atomic_inc_return(&event->profile_count))
32 return 0; 27 return 0;
33 28
34 if (!total_profile_count) { 29 if (!total_profile_count) {
35 buf = (char *)alloc_percpu(profile_buf_t); 30 buf = alloc_percpu(struct perf_trace_buf);
36 if (!buf) 31 if (!buf)
37 goto fail_buf; 32 goto fail_buf;
38 33
39 rcu_assign_pointer(trace_profile_buf, buf); 34 rcu_assign_pointer(perf_trace_buf, buf);
40 35
41 buf = (char *)alloc_percpu(profile_buf_t); 36 buf = alloc_percpu(struct perf_trace_buf);
42 if (!buf) 37 if (!buf)
43 goto fail_buf_nmi; 38 goto fail_buf_nmi;
44 39
45 rcu_assign_pointer(trace_profile_buf_nmi, buf); 40 rcu_assign_pointer(perf_trace_buf_nmi, buf);
46 } 41 }
47 42
48 ret = event->profile_enable(event); 43 ret = event->profile_enable(event);
@@ -53,10 +48,10 @@ static int ftrace_profile_enable_event(struct ftrace_event_call *event)
53 48
54fail_buf_nmi: 49fail_buf_nmi:
55 if (!total_profile_count) { 50 if (!total_profile_count) {
56 free_percpu(trace_profile_buf_nmi); 51 free_percpu(perf_trace_buf_nmi);
57 free_percpu(trace_profile_buf); 52 free_percpu(perf_trace_buf);
58 trace_profile_buf_nmi = NULL; 53 perf_trace_buf_nmi = NULL;
59 trace_profile_buf = NULL; 54 perf_trace_buf = NULL;
60 } 55 }
61fail_buf: 56fail_buf:
62 atomic_dec(&event->profile_count); 57 atomic_dec(&event->profile_count);
@@ -84,7 +79,7 @@ int ftrace_profile_enable(int event_id)
84 79
85static void ftrace_profile_disable_event(struct ftrace_event_call *event) 80static void ftrace_profile_disable_event(struct ftrace_event_call *event)
86{ 81{
87 char *buf, *nmi_buf; 82 struct perf_trace_buf *buf, *nmi_buf;
88 83
89 if (!atomic_add_negative(-1, &event->profile_count)) 84 if (!atomic_add_negative(-1, &event->profile_count))
90 return; 85 return;
@@ -92,11 +87,11 @@ static void ftrace_profile_disable_event(struct ftrace_event_call *event)
92 event->profile_disable(event); 87 event->profile_disable(event);
93 88
94 if (!--total_profile_count) { 89 if (!--total_profile_count) {
95 buf = trace_profile_buf; 90 buf = perf_trace_buf;
96 rcu_assign_pointer(trace_profile_buf, NULL); 91 rcu_assign_pointer(perf_trace_buf, NULL);
97 92
98 nmi_buf = trace_profile_buf_nmi; 93 nmi_buf = perf_trace_buf_nmi;
99 rcu_assign_pointer(trace_profile_buf_nmi, NULL); 94 rcu_assign_pointer(perf_trace_buf_nmi, NULL);
100 95
101 /* 96 /*
102 * Ensure every events in profiling have finished before 97 * Ensure every events in profiling have finished before
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index cf17a6694f32..3696476f307d 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1208,6 +1208,7 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp,
1208 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); 1208 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
1209 struct ftrace_event_call *call = &tp->call; 1209 struct ftrace_event_call *call = &tp->call;
1210 struct kprobe_trace_entry *entry; 1210 struct kprobe_trace_entry *entry;
1211 struct perf_trace_buf *trace_buf;
1211 struct trace_entry *ent; 1212 struct trace_entry *ent;
1212 int size, __size, i, pc, __cpu; 1213 int size, __size, i, pc, __cpu;
1213 unsigned long irq_flags; 1214 unsigned long irq_flags;
@@ -1229,14 +1230,26 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp,
1229 __cpu = smp_processor_id(); 1230 __cpu = smp_processor_id();
1230 1231
1231 if (in_nmi()) 1232 if (in_nmi())
1232 raw_data = rcu_dereference(trace_profile_buf_nmi); 1233 trace_buf = rcu_dereference(perf_trace_buf_nmi);
1233 else 1234 else
1234 raw_data = rcu_dereference(trace_profile_buf); 1235 trace_buf = rcu_dereference(perf_trace_buf);
1235 1236
1236 if (!raw_data) 1237 if (!trace_buf)
1237 goto end; 1238 goto end;
1238 1239
1239 raw_data = per_cpu_ptr(raw_data, __cpu); 1240 trace_buf = per_cpu_ptr(trace_buf, __cpu);
1241
1242 if (trace_buf->recursion++)
1243 goto end_recursion;
1244
1245 /*
1246 * Make recursion update visible before entering perf_tp_event
1247 * so that we protect from perf recursions.
1248 */
1249 barrier();
1250
1251 raw_data = trace_buf->buf;
1252
1240 /* Zero dead bytes from alignment to avoid buffer leak to userspace */ 1253 /* Zero dead bytes from alignment to avoid buffer leak to userspace */
1241 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; 1254 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
1242 entry = (struct kprobe_trace_entry *)raw_data; 1255 entry = (struct kprobe_trace_entry *)raw_data;
@@ -1249,8 +1262,12 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp,
1249 for (i = 0; i < tp->nr_args; i++) 1262 for (i = 0; i < tp->nr_args; i++)
1250 entry->args[i] = call_fetch(&tp->args[i].fetch, regs); 1263 entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
1251 perf_tp_event(call->id, entry->ip, 1, entry, size); 1264 perf_tp_event(call->id, entry->ip, 1, entry, size);
1265
1266end_recursion:
1267 trace_buf->recursion--;
1252end: 1268end:
1253 local_irq_restore(irq_flags); 1269 local_irq_restore(irq_flags);
1270
1254 return 0; 1271 return 0;
1255} 1272}
1256 1273
@@ -1261,6 +1278,7 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri,
1261 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); 1278 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
1262 struct ftrace_event_call *call = &tp->call; 1279 struct ftrace_event_call *call = &tp->call;
1263 struct kretprobe_trace_entry *entry; 1280 struct kretprobe_trace_entry *entry;
1281 struct perf_trace_buf *trace_buf;
1264 struct trace_entry *ent; 1282 struct trace_entry *ent;
1265 int size, __size, i, pc, __cpu; 1283 int size, __size, i, pc, __cpu;
1266 unsigned long irq_flags; 1284 unsigned long irq_flags;
@@ -1282,14 +1300,26 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri,
1282 __cpu = smp_processor_id(); 1300 __cpu = smp_processor_id();
1283 1301
1284 if (in_nmi()) 1302 if (in_nmi())
1285 raw_data = rcu_dereference(trace_profile_buf_nmi); 1303 trace_buf = rcu_dereference(perf_trace_buf_nmi);
1286 else 1304 else
1287 raw_data = rcu_dereference(trace_profile_buf); 1305 trace_buf = rcu_dereference(perf_trace_buf);
1288 1306
1289 if (!raw_data) 1307 if (!trace_buf)
1290 goto end; 1308 goto end;
1291 1309
1292 raw_data = per_cpu_ptr(raw_data, __cpu); 1310 trace_buf = per_cpu_ptr(trace_buf, __cpu);
1311
1312 if (trace_buf->recursion++)
1313 goto end_recursion;
1314
1315 /*
1316 * Make recursion update visible before entering perf_tp_event
1317 * so that we protect from perf recursions.
1318 */
1319 barrier();
1320
1321 raw_data = trace_buf->buf;
1322
1293 /* Zero dead bytes from alignment to avoid buffer leak to userspace */ 1323 /* Zero dead bytes from alignment to avoid buffer leak to userspace */
1294 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; 1324 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
1295 entry = (struct kretprobe_trace_entry *)raw_data; 1325 entry = (struct kretprobe_trace_entry *)raw_data;
@@ -1303,8 +1333,12 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri,
1303 for (i = 0; i < tp->nr_args; i++) 1333 for (i = 0; i < tp->nr_args; i++)
1304 entry->args[i] = call_fetch(&tp->args[i].fetch, regs); 1334 entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
1305 perf_tp_event(call->id, entry->ret_ip, 1, entry, size); 1335 perf_tp_event(call->id, entry->ret_ip, 1, entry, size);
1336
1337end_recursion:
1338 trace_buf->recursion--;
1306end: 1339end:
1307 local_irq_restore(irq_flags); 1340 local_irq_restore(irq_flags);
1341
1308 return 0; 1342 return 0;
1309} 1343}
1310 1344
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 58b8e5370767..51213b0aa81b 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -477,6 +477,7 @@ static int sys_prof_refcount_exit;
477static void prof_syscall_enter(struct pt_regs *regs, long id) 477static void prof_syscall_enter(struct pt_regs *regs, long id)
478{ 478{
479 struct syscall_metadata *sys_data; 479 struct syscall_metadata *sys_data;
480 struct perf_trace_buf *trace_buf;
480 struct syscall_trace_enter *rec; 481 struct syscall_trace_enter *rec;
481 unsigned long flags; 482 unsigned long flags;
482 char *raw_data; 483 char *raw_data;
@@ -507,14 +508,25 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
507 cpu = smp_processor_id(); 508 cpu = smp_processor_id();
508 509
509 if (in_nmi()) 510 if (in_nmi())
510 raw_data = rcu_dereference(trace_profile_buf_nmi); 511 trace_buf = rcu_dereference(perf_trace_buf_nmi);
511 else 512 else
512 raw_data = rcu_dereference(trace_profile_buf); 513 trace_buf = rcu_dereference(perf_trace_buf);
513 514
514 if (!raw_data) 515 if (!trace_buf)
515 goto end; 516 goto end;
516 517
517 raw_data = per_cpu_ptr(raw_data, cpu); 518 trace_buf = per_cpu_ptr(trace_buf, cpu);
519
520 if (trace_buf->recursion++)
521 goto end_recursion;
522
523 /*
524 * Make recursion update visible before entering perf_tp_event
525 * so that we protect from perf recursions.
526 */
527 barrier();
528
529 raw_data = trace_buf->buf;
518 530
519 /* zero the dead bytes from align to not leak stack to user */ 531 /* zero the dead bytes from align to not leak stack to user */
520 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; 532 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
@@ -527,6 +539,8 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
527 (unsigned long *)&rec->args); 539 (unsigned long *)&rec->args);
528 perf_tp_event(sys_data->enter_id, 0, 1, rec, size); 540 perf_tp_event(sys_data->enter_id, 0, 1, rec, size);
529 541
542end_recursion:
543 trace_buf->recursion--;
530end: 544end:
531 local_irq_restore(flags); 545 local_irq_restore(flags);
532} 546}
@@ -574,6 +588,7 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
574{ 588{
575 struct syscall_metadata *sys_data; 589 struct syscall_metadata *sys_data;
576 struct syscall_trace_exit *rec; 590 struct syscall_trace_exit *rec;
591 struct perf_trace_buf *trace_buf;
577 unsigned long flags; 592 unsigned long flags;
578 int syscall_nr; 593 int syscall_nr;
579 char *raw_data; 594 char *raw_data;
@@ -605,14 +620,25 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
605 cpu = smp_processor_id(); 620 cpu = smp_processor_id();
606 621
607 if (in_nmi()) 622 if (in_nmi())
608 raw_data = rcu_dereference(trace_profile_buf_nmi); 623 trace_buf = rcu_dereference(perf_trace_buf_nmi);
609 else 624 else
610 raw_data = rcu_dereference(trace_profile_buf); 625 trace_buf = rcu_dereference(perf_trace_buf);
611 626
612 if (!raw_data) 627 if (!trace_buf)
613 goto end; 628 goto end;
614 629
615 raw_data = per_cpu_ptr(raw_data, cpu); 630 trace_buf = per_cpu_ptr(trace_buf, cpu);
631
632 if (trace_buf->recursion++)
633 goto end_recursion;
634
635 /*
636 * Make recursion update visible before entering perf_tp_event
637 * so that we protect from perf recursions.
638 */
639 barrier();
640
641 raw_data = trace_buf->buf;
616 642
617 /* zero the dead bytes from align to not leak stack to user */ 643 /* zero the dead bytes from align to not leak stack to user */
618 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; 644 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
@@ -626,6 +652,8 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
626 652
627 perf_tp_event(sys_data->exit_id, 0, 1, rec, size); 653 perf_tp_event(sys_data->exit_id, 0, 1, rec, size);
628 654
655end_recursion:
656 trace_buf->recursion--;
629end: 657end:
630 local_irq_restore(flags); 658 local_irq_restore(flags);
631} 659}