aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/ftrace_event.h9
-rw-r--r--include/trace/ftrace.h39
-rw-r--r--kernel/trace/trace_event_profile.c41
-rw-r--r--kernel/trace/trace_kprobe.c50
-rw-r--r--kernel/trace/trace_syscalls.c44
5 files changed, 133 insertions, 50 deletions
diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index f7b47c336703..43360c1d8f70 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -137,8 +137,13 @@ struct ftrace_event_call {
137 137
138#define FTRACE_MAX_PROFILE_SIZE 2048 138#define FTRACE_MAX_PROFILE_SIZE 2048
139 139
140extern char *trace_profile_buf; 140struct perf_trace_buf {
141extern char *trace_profile_buf_nmi; 141 char buf[FTRACE_MAX_PROFILE_SIZE];
142 int recursion;
143};
144
145extern struct perf_trace_buf *perf_trace_buf;
146extern struct perf_trace_buf *perf_trace_buf_nmi;
142 147
143#define MAX_FILTER_PRED 32 148#define MAX_FILTER_PRED 32
144#define MAX_FILTER_STR_VAL 256 /* Should handle KSYM_SYMBOL_LEN */ 149#define MAX_FILTER_STR_VAL 256 /* Should handle KSYM_SYMBOL_LEN */
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index a7f946094128..4945d1c99864 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -649,6 +649,7 @@ __attribute__((section("_ftrace_events"))) event_##call = { \
649 * struct ftrace_event_call *event_call = &event_<call>; 649 * struct ftrace_event_call *event_call = &event_<call>;
650 * extern void perf_tp_event(int, u64, u64, void *, int); 650 * extern void perf_tp_event(int, u64, u64, void *, int);
651 * struct ftrace_raw_##call *entry; 651 * struct ftrace_raw_##call *entry;
652 * struct perf_trace_buf *trace_buf;
652 * u64 __addr = 0, __count = 1; 653 * u64 __addr = 0, __count = 1;
653 * unsigned long irq_flags; 654 * unsigned long irq_flags;
654 * struct trace_entry *ent; 655 * struct trace_entry *ent;
@@ -673,14 +674,25 @@ __attribute__((section("_ftrace_events"))) event_##call = { \
673 * __cpu = smp_processor_id(); 674 * __cpu = smp_processor_id();
674 * 675 *
675 * if (in_nmi()) 676 * if (in_nmi())
676 * raw_data = rcu_dereference(trace_profile_buf_nmi); 677 * trace_buf = rcu_dereference(perf_trace_buf_nmi);
677 * else 678 * else
678 * raw_data = rcu_dereference(trace_profile_buf); 679 * trace_buf = rcu_dereference(perf_trace_buf);
679 * 680 *
680 * if (!raw_data) 681 * if (!trace_buf)
681 * goto end; 682 * goto end;
682 * 683 *
683 * raw_data = per_cpu_ptr(raw_data, __cpu); 684 * trace_buf = per_cpu_ptr(trace_buf, __cpu);
685 *
686 * // Avoid recursion from perf that could mess up the buffer
687 * if (trace_buf->recursion++)
688 * goto end_recursion;
689 *
690 * raw_data = trace_buf->buf;
691 *
692 * // Make recursion update visible before entering perf_tp_event
693 * // so that we protect from perf recursions.
694 *
695 * barrier();
684 * 696 *
685 * //zero dead bytes from alignment to avoid stack leak to userspace: 697 * //zero dead bytes from alignment to avoid stack leak to userspace:
686 * *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL; 698 * *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL;
@@ -713,8 +725,9 @@ static void ftrace_profile_##call(proto) \
713{ \ 725{ \
714 struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\ 726 struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\
715 struct ftrace_event_call *event_call = &event_##call; \ 727 struct ftrace_event_call *event_call = &event_##call; \
716 extern void perf_tp_event(int, u64, u64, void *, int); \ 728 extern void perf_tp_event(int, u64, u64, void *, int); \
717 struct ftrace_raw_##call *entry; \ 729 struct ftrace_raw_##call *entry; \
730 struct perf_trace_buf *trace_buf; \
718 u64 __addr = 0, __count = 1; \ 731 u64 __addr = 0, __count = 1; \
719 unsigned long irq_flags; \ 732 unsigned long irq_flags; \
720 struct trace_entry *ent; \ 733 struct trace_entry *ent; \
@@ -739,14 +752,20 @@ static void ftrace_profile_##call(proto) \
739 __cpu = smp_processor_id(); \ 752 __cpu = smp_processor_id(); \
740 \ 753 \
741 if (in_nmi()) \ 754 if (in_nmi()) \
742 raw_data = rcu_dereference(trace_profile_buf_nmi); \ 755 trace_buf = rcu_dereference(perf_trace_buf_nmi); \
743 else \ 756 else \
744 raw_data = rcu_dereference(trace_profile_buf); \ 757 trace_buf = rcu_dereference(perf_trace_buf); \
745 \ 758 \
746 if (!raw_data) \ 759 if (!trace_buf) \
747 goto end; \ 760 goto end; \
748 \ 761 \
749 raw_data = per_cpu_ptr(raw_data, __cpu); \ 762 trace_buf = per_cpu_ptr(trace_buf, __cpu); \
763 if (trace_buf->recursion++) \
764 goto end_recursion; \
765 \
766 barrier(); \
767 \
768 raw_data = trace_buf->buf; \
750 \ 769 \
751 *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL; \ 770 *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL; \
752 entry = (struct ftrace_raw_##call *)raw_data; \ 771 entry = (struct ftrace_raw_##call *)raw_data; \
@@ -761,6 +780,8 @@ static void ftrace_profile_##call(proto) \
761 perf_tp_event(event_call->id, __addr, __count, entry, \ 780 perf_tp_event(event_call->id, __addr, __count, entry, \
762 __entry_size); \ 781 __entry_size); \
763 \ 782 \
783end_recursion: \
784 trace_buf->recursion--; \
764end: \ 785end: \
765 local_irq_restore(irq_flags); \ 786 local_irq_restore(irq_flags); \
766 \ 787 \
diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c
index c9f687ab0d4f..e0d351b01f5a 100644
--- a/kernel/trace/trace_event_profile.c
+++ b/kernel/trace/trace_event_profile.c
@@ -8,41 +8,36 @@
8#include <linux/module.h> 8#include <linux/module.h>
9#include "trace.h" 9#include "trace.h"
10 10
11/*
12 * We can't use a size but a type in alloc_percpu()
13 * So let's create a dummy type that matches the desired size
14 */
15typedef struct {char buf[FTRACE_MAX_PROFILE_SIZE];} profile_buf_t;
16 11
17char *trace_profile_buf; 12struct perf_trace_buf *perf_trace_buf;
18EXPORT_SYMBOL_GPL(trace_profile_buf); 13EXPORT_SYMBOL_GPL(perf_trace_buf);
19 14
20char *trace_profile_buf_nmi; 15struct perf_trace_buf *perf_trace_buf_nmi;
21EXPORT_SYMBOL_GPL(trace_profile_buf_nmi); 16EXPORT_SYMBOL_GPL(perf_trace_buf_nmi);
22 17
23/* Count the events in use (per event id, not per instance) */ 18/* Count the events in use (per event id, not per instance) */
24static int total_profile_count; 19static int total_profile_count;
25 20
26static int ftrace_profile_enable_event(struct ftrace_event_call *event) 21static int ftrace_profile_enable_event(struct ftrace_event_call *event)
27{ 22{
28 char *buf; 23 struct perf_trace_buf *buf;
29 int ret = -ENOMEM; 24 int ret = -ENOMEM;
30 25
31 if (atomic_inc_return(&event->profile_count)) 26 if (atomic_inc_return(&event->profile_count))
32 return 0; 27 return 0;
33 28
34 if (!total_profile_count) { 29 if (!total_profile_count) {
35 buf = (char *)alloc_percpu(profile_buf_t); 30 buf = alloc_percpu(struct perf_trace_buf);
36 if (!buf) 31 if (!buf)
37 goto fail_buf; 32 goto fail_buf;
38 33
39 rcu_assign_pointer(trace_profile_buf, buf); 34 rcu_assign_pointer(perf_trace_buf, buf);
40 35
41 buf = (char *)alloc_percpu(profile_buf_t); 36 buf = alloc_percpu(struct perf_trace_buf);
42 if (!buf) 37 if (!buf)
43 goto fail_buf_nmi; 38 goto fail_buf_nmi;
44 39
45 rcu_assign_pointer(trace_profile_buf_nmi, buf); 40 rcu_assign_pointer(perf_trace_buf_nmi, buf);
46 } 41 }
47 42
48 ret = event->profile_enable(event); 43 ret = event->profile_enable(event);
@@ -53,10 +48,10 @@ static int ftrace_profile_enable_event(struct ftrace_event_call *event)
53 48
54fail_buf_nmi: 49fail_buf_nmi:
55 if (!total_profile_count) { 50 if (!total_profile_count) {
56 free_percpu(trace_profile_buf_nmi); 51 free_percpu(perf_trace_buf_nmi);
57 free_percpu(trace_profile_buf); 52 free_percpu(perf_trace_buf);
58 trace_profile_buf_nmi = NULL; 53 perf_trace_buf_nmi = NULL;
59 trace_profile_buf = NULL; 54 perf_trace_buf = NULL;
60 } 55 }
61fail_buf: 56fail_buf:
62 atomic_dec(&event->profile_count); 57 atomic_dec(&event->profile_count);
@@ -84,7 +79,7 @@ int ftrace_profile_enable(int event_id)
84 79
85static void ftrace_profile_disable_event(struct ftrace_event_call *event) 80static void ftrace_profile_disable_event(struct ftrace_event_call *event)
86{ 81{
87 char *buf, *nmi_buf; 82 struct perf_trace_buf *buf, *nmi_buf;
88 83
89 if (!atomic_add_negative(-1, &event->profile_count)) 84 if (!atomic_add_negative(-1, &event->profile_count))
90 return; 85 return;
@@ -92,11 +87,11 @@ static void ftrace_profile_disable_event(struct ftrace_event_call *event)
92 event->profile_disable(event); 87 event->profile_disable(event);
93 88
94 if (!--total_profile_count) { 89 if (!--total_profile_count) {
95 buf = trace_profile_buf; 90 buf = perf_trace_buf;
96 rcu_assign_pointer(trace_profile_buf, NULL); 91 rcu_assign_pointer(perf_trace_buf, NULL);
97 92
98 nmi_buf = trace_profile_buf_nmi; 93 nmi_buf = perf_trace_buf_nmi;
99 rcu_assign_pointer(trace_profile_buf_nmi, NULL); 94 rcu_assign_pointer(perf_trace_buf_nmi, NULL);
100 95
101 /* 96 /*
102 * Ensure every events in profiling have finished before 97 * Ensure every events in profiling have finished before
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index cf17a6694f32..3696476f307d 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1208,6 +1208,7 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp,
1208 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); 1208 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
1209 struct ftrace_event_call *call = &tp->call; 1209 struct ftrace_event_call *call = &tp->call;
1210 struct kprobe_trace_entry *entry; 1210 struct kprobe_trace_entry *entry;
1211 struct perf_trace_buf *trace_buf;
1211 struct trace_entry *ent; 1212 struct trace_entry *ent;
1212 int size, __size, i, pc, __cpu; 1213 int size, __size, i, pc, __cpu;
1213 unsigned long irq_flags; 1214 unsigned long irq_flags;
@@ -1229,14 +1230,26 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp,
1229 __cpu = smp_processor_id(); 1230 __cpu = smp_processor_id();
1230 1231
1231 if (in_nmi()) 1232 if (in_nmi())
1232 raw_data = rcu_dereference(trace_profile_buf_nmi); 1233 trace_buf = rcu_dereference(perf_trace_buf_nmi);
1233 else 1234 else
1234 raw_data = rcu_dereference(trace_profile_buf); 1235 trace_buf = rcu_dereference(perf_trace_buf);
1235 1236
1236 if (!raw_data) 1237 if (!trace_buf)
1237 goto end; 1238 goto end;
1238 1239
1239 raw_data = per_cpu_ptr(raw_data, __cpu); 1240 trace_buf = per_cpu_ptr(trace_buf, __cpu);
1241
1242 if (trace_buf->recursion++)
1243 goto end_recursion;
1244
1245 /*
1246 * Make recursion update visible before entering perf_tp_event
1247 * so that we protect from perf recursions.
1248 */
1249 barrier();
1250
1251 raw_data = trace_buf->buf;
1252
1240 /* Zero dead bytes from alignment to avoid buffer leak to userspace */ 1253 /* Zero dead bytes from alignment to avoid buffer leak to userspace */
1241 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; 1254 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
1242 entry = (struct kprobe_trace_entry *)raw_data; 1255 entry = (struct kprobe_trace_entry *)raw_data;
@@ -1249,8 +1262,12 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp,
1249 for (i = 0; i < tp->nr_args; i++) 1262 for (i = 0; i < tp->nr_args; i++)
1250 entry->args[i] = call_fetch(&tp->args[i].fetch, regs); 1263 entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
1251 perf_tp_event(call->id, entry->ip, 1, entry, size); 1264 perf_tp_event(call->id, entry->ip, 1, entry, size);
1265
1266end_recursion:
1267 trace_buf->recursion--;
1252end: 1268end:
1253 local_irq_restore(irq_flags); 1269 local_irq_restore(irq_flags);
1270
1254 return 0; 1271 return 0;
1255} 1272}
1256 1273
@@ -1261,6 +1278,7 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri,
1261 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); 1278 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
1262 struct ftrace_event_call *call = &tp->call; 1279 struct ftrace_event_call *call = &tp->call;
1263 struct kretprobe_trace_entry *entry; 1280 struct kretprobe_trace_entry *entry;
1281 struct perf_trace_buf *trace_buf;
1264 struct trace_entry *ent; 1282 struct trace_entry *ent;
1265 int size, __size, i, pc, __cpu; 1283 int size, __size, i, pc, __cpu;
1266 unsigned long irq_flags; 1284 unsigned long irq_flags;
@@ -1282,14 +1300,26 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri,
1282 __cpu = smp_processor_id(); 1300 __cpu = smp_processor_id();
1283 1301
1284 if (in_nmi()) 1302 if (in_nmi())
1285 raw_data = rcu_dereference(trace_profile_buf_nmi); 1303 trace_buf = rcu_dereference(perf_trace_buf_nmi);
1286 else 1304 else
1287 raw_data = rcu_dereference(trace_profile_buf); 1305 trace_buf = rcu_dereference(perf_trace_buf);
1288 1306
1289 if (!raw_data) 1307 if (!trace_buf)
1290 goto end; 1308 goto end;
1291 1309
1292 raw_data = per_cpu_ptr(raw_data, __cpu); 1310 trace_buf = per_cpu_ptr(trace_buf, __cpu);
1311
1312 if (trace_buf->recursion++)
1313 goto end_recursion;
1314
1315 /*
1316 * Make recursion update visible before entering perf_tp_event
1317 * so that we protect from perf recursions.
1318 */
1319 barrier();
1320
1321 raw_data = trace_buf->buf;
1322
1293 /* Zero dead bytes from alignment to avoid buffer leak to userspace */ 1323 /* Zero dead bytes from alignment to avoid buffer leak to userspace */
1294 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; 1324 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
1295 entry = (struct kretprobe_trace_entry *)raw_data; 1325 entry = (struct kretprobe_trace_entry *)raw_data;
@@ -1303,8 +1333,12 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri,
1303 for (i = 0; i < tp->nr_args; i++) 1333 for (i = 0; i < tp->nr_args; i++)
1304 entry->args[i] = call_fetch(&tp->args[i].fetch, regs); 1334 entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
1305 perf_tp_event(call->id, entry->ret_ip, 1, entry, size); 1335 perf_tp_event(call->id, entry->ret_ip, 1, entry, size);
1336
1337end_recursion:
1338 trace_buf->recursion--;
1306end: 1339end:
1307 local_irq_restore(irq_flags); 1340 local_irq_restore(irq_flags);
1341
1308 return 0; 1342 return 0;
1309} 1343}
1310 1344
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 58b8e5370767..51213b0aa81b 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -477,6 +477,7 @@ static int sys_prof_refcount_exit;
477static void prof_syscall_enter(struct pt_regs *regs, long id) 477static void prof_syscall_enter(struct pt_regs *regs, long id)
478{ 478{
479 struct syscall_metadata *sys_data; 479 struct syscall_metadata *sys_data;
480 struct perf_trace_buf *trace_buf;
480 struct syscall_trace_enter *rec; 481 struct syscall_trace_enter *rec;
481 unsigned long flags; 482 unsigned long flags;
482 char *raw_data; 483 char *raw_data;
@@ -507,14 +508,25 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
507 cpu = smp_processor_id(); 508 cpu = smp_processor_id();
508 509
509 if (in_nmi()) 510 if (in_nmi())
510 raw_data = rcu_dereference(trace_profile_buf_nmi); 511 trace_buf = rcu_dereference(perf_trace_buf_nmi);
511 else 512 else
512 raw_data = rcu_dereference(trace_profile_buf); 513 trace_buf = rcu_dereference(perf_trace_buf);
513 514
514 if (!raw_data) 515 if (!trace_buf)
515 goto end; 516 goto end;
516 517
517 raw_data = per_cpu_ptr(raw_data, cpu); 518 trace_buf = per_cpu_ptr(trace_buf, cpu);
519
520 if (trace_buf->recursion++)
521 goto end_recursion;
522
523 /*
524 * Make recursion update visible before entering perf_tp_event
525 * so that we protect from perf recursions.
526 */
527 barrier();
528
529 raw_data = trace_buf->buf;
518 530
519 /* zero the dead bytes from align to not leak stack to user */ 531 /* zero the dead bytes from align to not leak stack to user */
520 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; 532 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
@@ -527,6 +539,8 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
527 (unsigned long *)&rec->args); 539 (unsigned long *)&rec->args);
528 perf_tp_event(sys_data->enter_id, 0, 1, rec, size); 540 perf_tp_event(sys_data->enter_id, 0, 1, rec, size);
529 541
542end_recursion:
543 trace_buf->recursion--;
530end: 544end:
531 local_irq_restore(flags); 545 local_irq_restore(flags);
532} 546}
@@ -574,6 +588,7 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
574{ 588{
575 struct syscall_metadata *sys_data; 589 struct syscall_metadata *sys_data;
576 struct syscall_trace_exit *rec; 590 struct syscall_trace_exit *rec;
591 struct perf_trace_buf *trace_buf;
577 unsigned long flags; 592 unsigned long flags;
578 int syscall_nr; 593 int syscall_nr;
579 char *raw_data; 594 char *raw_data;
@@ -605,14 +620,25 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
605 cpu = smp_processor_id(); 620 cpu = smp_processor_id();
606 621
607 if (in_nmi()) 622 if (in_nmi())
608 raw_data = rcu_dereference(trace_profile_buf_nmi); 623 trace_buf = rcu_dereference(perf_trace_buf_nmi);
609 else 624 else
610 raw_data = rcu_dereference(trace_profile_buf); 625 trace_buf = rcu_dereference(perf_trace_buf);
611 626
612 if (!raw_data) 627 if (!trace_buf)
613 goto end; 628 goto end;
614 629
615 raw_data = per_cpu_ptr(raw_data, cpu); 630 trace_buf = per_cpu_ptr(trace_buf, cpu);
631
632 if (trace_buf->recursion++)
633 goto end_recursion;
634
635 /*
636 * Make recursion update visible before entering perf_tp_event
637 * so that we protect from perf recursions.
638 */
639 barrier();
640
641 raw_data = trace_buf->buf;
616 642
617 /* zero the dead bytes from align to not leak stack to user */ 643 /* zero the dead bytes from align to not leak stack to user */
618 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; 644 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
@@ -626,6 +652,8 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
626 652
627 perf_tp_event(sys_data->exit_id, 0, 1, rec, size); 653 perf_tp_event(sys_data->exit_id, 0, 1, rec, size);
628 654
655end_recursion:
656 trace_buf->recursion--;
629end: 657end:
630 local_irq_restore(flags); 658 local_irq_restore(flags);
631} 659}