aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/ftrace_event.h6
-rw-r--r--include/trace/ftrace.h83
-rw-r--r--kernel/trace/trace_event_profile.c61
-rw-r--r--kernel/trace/trace_syscalls.c97
4 files changed, 199 insertions, 48 deletions
diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index bc103d7b1ca8..4ec5e67e18cf 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -4,6 +4,7 @@
4#include <linux/ring_buffer.h> 4#include <linux/ring_buffer.h>
5#include <linux/trace_seq.h> 5#include <linux/trace_seq.h>
6#include <linux/percpu.h> 6#include <linux/percpu.h>
7#include <linux/hardirq.h>
7 8
8struct trace_array; 9struct trace_array;
9struct tracer; 10struct tracer;
@@ -134,6 +135,11 @@ struct ftrace_event_call {
134 void (*profile_disable)(void); 135 void (*profile_disable)(void);
135}; 136};
136 137
138#define FTRACE_MAX_PROFILE_SIZE 2048
139
140extern char *trace_profile_buf;
141extern char *trace_profile_buf_nmi;
142
137#define MAX_FILTER_PRED 32 143#define MAX_FILTER_PRED 32
138#define MAX_FILTER_STR_VAL 256 /* Should handle KSYM_SYMBOL_LEN */ 144#define MAX_FILTER_STR_VAL 256 /* Should handle KSYM_SYMBOL_LEN */
139 145
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index a822087857e9..a0361cb69769 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -648,11 +648,12 @@ __attribute__((section("_ftrace_events"))) event_##call = { \
648 * struct ftrace_raw_##call *entry; 648 * struct ftrace_raw_##call *entry;
649 * u64 __addr = 0, __count = 1; 649 * u64 __addr = 0, __count = 1;
650 * unsigned long irq_flags; 650 * unsigned long irq_flags;
651 * struct trace_entry *ent;
651 * int __entry_size; 652 * int __entry_size;
652 * int __data_size; 653 * int __data_size;
654 * int __cpu
653 * int pc; 655 * int pc;
654 * 656 *
655 * local_save_flags(irq_flags);
656 * pc = preempt_count(); 657 * pc = preempt_count();
657 * 658 *
658 * __data_size = ftrace_get_offsets_<call>(&__data_offsets, args); 659 * __data_size = ftrace_get_offsets_<call>(&__data_offsets, args);
@@ -663,25 +664,34 @@ __attribute__((section("_ftrace_events"))) event_##call = { \
663 * sizeof(u64)); 664 * sizeof(u64));
664 * __entry_size -= sizeof(u32); 665 * __entry_size -= sizeof(u32);
665 * 666 *
666 * do { 667 * // Protect the non nmi buffer
667 * char raw_data[__entry_size]; <- allocate our sample in the stack 668 * // This also protects the rcu read side
668 * struct trace_entry *ent; 669 * local_irq_save(irq_flags);
670 * __cpu = smp_processor_id();
671 *
672 * if (in_nmi())
673 * raw_data = rcu_dereference(trace_profile_buf_nmi);
674 * else
675 * raw_data = rcu_dereference(trace_profile_buf);
676 *
677 * if (!raw_data)
678 * goto end;
669 * 679 *
670 * zero dead bytes from alignment to avoid stack leak to userspace: 680 * raw_data = per_cpu_ptr(raw_data, __cpu);
671 * 681 *
672 * *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL; 682 * //zero dead bytes from alignment to avoid stack leak to userspace:
673 * entry = (struct ftrace_raw_<call> *)raw_data; 683 * *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL;
674 * ent = &entry->ent; 684 * entry = (struct ftrace_raw_<call> *)raw_data;
675 * tracing_generic_entry_update(ent, irq_flags, pc); 685 * ent = &entry->ent;
676 * ent->type = event_call->id; 686 * tracing_generic_entry_update(ent, irq_flags, pc);
687 * ent->type = event_call->id;
677 * 688 *
678 * <tstruct> <- do some jobs with dynamic arrays 689 * <tstruct> <- do some jobs with dynamic arrays
679 * 690 *
680 * <assign> <- affect our values 691 * <assign> <- affect our values
681 * 692 *
682 * perf_tpcounter_event(event_call->id, __addr, __count, entry, 693 * perf_tpcounter_event(event_call->id, __addr, __count, entry,
683 * __entry_size); <- submit them to perf counter 694 * __entry_size); <- submit them to perf counter
684 * } while (0);
685 * 695 *
686 * } 696 * }
687 */ 697 */
@@ -704,11 +714,13 @@ static void ftrace_profile_##call(proto) \
704 struct ftrace_raw_##call *entry; \ 714 struct ftrace_raw_##call *entry; \
705 u64 __addr = 0, __count = 1; \ 715 u64 __addr = 0, __count = 1; \
706 unsigned long irq_flags; \ 716 unsigned long irq_flags; \
717 struct trace_entry *ent; \
707 int __entry_size; \ 718 int __entry_size; \
708 int __data_size; \ 719 int __data_size; \
720 char *raw_data; \
721 int __cpu; \
709 int pc; \ 722 int pc; \
710 \ 723 \
711 local_save_flags(irq_flags); \
712 pc = preempt_count(); \ 724 pc = preempt_count(); \
713 \ 725 \
714 __data_size = ftrace_get_offsets_##call(&__data_offsets, args); \ 726 __data_size = ftrace_get_offsets_##call(&__data_offsets, args); \
@@ -716,23 +728,38 @@ static void ftrace_profile_##call(proto) \
716 sizeof(u64)); \ 728 sizeof(u64)); \
717 __entry_size -= sizeof(u32); \ 729 __entry_size -= sizeof(u32); \
718 \ 730 \
719 do { \ 731 if (WARN_ONCE(__entry_size > FTRACE_MAX_PROFILE_SIZE, \
720 char raw_data[__entry_size]; \ 732 "profile buffer not large enough")) \
721 struct trace_entry *ent; \ 733 return; \
734 \
735 local_irq_save(irq_flags); \
736 __cpu = smp_processor_id(); \
722 \ 737 \
723 *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL; \ 738 if (in_nmi()) \
724 entry = (struct ftrace_raw_##call *)raw_data; \ 739 raw_data = rcu_dereference(trace_profile_buf_nmi); \
725 ent = &entry->ent; \ 740 else \
726 tracing_generic_entry_update(ent, irq_flags, pc); \ 741 raw_data = rcu_dereference(trace_profile_buf); \
727 ent->type = event_call->id; \
728 \ 742 \
729 tstruct \ 743 if (!raw_data) \
744 goto end; \
730 \ 745 \
731 { assign; } \ 746 raw_data = per_cpu_ptr(raw_data, __cpu); \
732 \ 747 \
733 perf_tpcounter_event(event_call->id, __addr, __count, entry,\ 748 *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL; \
749 entry = (struct ftrace_raw_##call *)raw_data; \
750 ent = &entry->ent; \
751 tracing_generic_entry_update(ent, irq_flags, pc); \
752 ent->type = event_call->id; \
753 \
754 tstruct \
755 \
756 { assign; } \
757 \
758 perf_tpcounter_event(event_call->id, __addr, __count, entry, \
734 __entry_size); \ 759 __entry_size); \
735 } while (0); \ 760 \
761end: \
762 local_irq_restore(irq_flags); \
736 \ 763 \
737} 764}
738 765
diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c
index df4a74efd50c..3aaa77c3309b 100644
--- a/kernel/trace/trace_event_profile.c
+++ b/kernel/trace/trace_event_profile.c
@@ -8,12 +8,52 @@
8#include <linux/module.h> 8#include <linux/module.h>
9#include "trace.h" 9#include "trace.h"
10 10
11/*
12 * We can't use a size but a type in alloc_percpu()
13 * So let's create a dummy type that matches the desired size
14 */
15typedef struct {char buf[FTRACE_MAX_PROFILE_SIZE];} profile_buf_t;
16
17char *trace_profile_buf;
18char *trace_profile_buf_nmi;
19
20/* Count the events in use (per event id, not per instance) */
21static int total_profile_count;
22
11static int ftrace_profile_enable_event(struct ftrace_event_call *event) 23static int ftrace_profile_enable_event(struct ftrace_event_call *event)
12{ 24{
25 char *buf;
26 int ret = -ENOMEM;
27
13 if (atomic_inc_return(&event->profile_count)) 28 if (atomic_inc_return(&event->profile_count))
14 return 0; 29 return 0;
15 30
16 return event->profile_enable(); 31 if (!total_profile_count++) {
32 buf = (char *)alloc_percpu(profile_buf_t);
33 if (!buf)
34 goto fail_buf;
35
36 rcu_assign_pointer(trace_profile_buf, buf);
37
38 buf = (char *)alloc_percpu(profile_buf_t);
39 if (!buf)
40 goto fail_buf_nmi;
41
42 rcu_assign_pointer(trace_profile_buf_nmi, buf);
43 }
44
45 ret = event->profile_enable();
46 if (!ret)
47 return 0;
48
49 kfree(trace_profile_buf_nmi);
50fail_buf_nmi:
51 kfree(trace_profile_buf);
52fail_buf:
53 total_profile_count--;
54 atomic_dec(&event->profile_count);
55
56 return ret;
17} 57}
18 58
19int ftrace_profile_enable(int event_id) 59int ftrace_profile_enable(int event_id)
@@ -36,10 +76,29 @@ int ftrace_profile_enable(int event_id)
36 76
37static void ftrace_profile_disable_event(struct ftrace_event_call *event) 77static void ftrace_profile_disable_event(struct ftrace_event_call *event)
38{ 78{
79 char *buf, *nmi_buf;
80
39 if (!atomic_add_negative(-1, &event->profile_count)) 81 if (!atomic_add_negative(-1, &event->profile_count))
40 return; 82 return;
41 83
42 event->profile_disable(); 84 event->profile_disable();
85
86 if (!--total_profile_count) {
87 buf = trace_profile_buf;
88 rcu_assign_pointer(trace_profile_buf, NULL);
89
90 nmi_buf = trace_profile_buf_nmi;
91 rcu_assign_pointer(trace_profile_buf_nmi, NULL);
92
93 /*
94 * Ensure every events in profiling have finished before
95 * releasing the buffers
96 */
97 synchronize_sched();
98
99 free_percpu(buf);
100 free_percpu(nmi_buf);
101 }
43} 102}
44 103
45void ftrace_profile_disable(int event_id) 104void ftrace_profile_disable(int event_id)
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 8712ce3c6a0e..7a3550cf2597 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -384,10 +384,13 @@ static int sys_prof_refcount_exit;
384 384
385static void prof_syscall_enter(struct pt_regs *regs, long id) 385static void prof_syscall_enter(struct pt_regs *regs, long id)
386{ 386{
387 struct syscall_trace_enter *rec;
388 struct syscall_metadata *sys_data; 387 struct syscall_metadata *sys_data;
388 struct syscall_trace_enter *rec;
389 unsigned long flags;
390 char *raw_data;
389 int syscall_nr; 391 int syscall_nr;
390 int size; 392 int size;
393 int cpu;
391 394
392 syscall_nr = syscall_get_nr(current, regs); 395 syscall_nr = syscall_get_nr(current, regs);
393 if (!test_bit(syscall_nr, enabled_prof_enter_syscalls)) 396 if (!test_bit(syscall_nr, enabled_prof_enter_syscalls))
@@ -402,20 +405,38 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
402 size = ALIGN(size + sizeof(u32), sizeof(u64)); 405 size = ALIGN(size + sizeof(u32), sizeof(u64));
403 size -= sizeof(u32); 406 size -= sizeof(u32);
404 407
405 do { 408 if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
406 char raw_data[size]; 409 "profile buffer not large enough"))
410 return;
411
412 /* Protect the per cpu buffer, begin the rcu read side */
413 local_irq_save(flags);
407 414
408 /* zero the dead bytes from align to not leak stack to user */ 415 cpu = smp_processor_id();
409 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; 416
417 if (in_nmi())
418 raw_data = rcu_dereference(trace_profile_buf_nmi);
419 else
420 raw_data = rcu_dereference(trace_profile_buf);
421
422 if (!raw_data)
423 goto end;
410 424
411 rec = (struct syscall_trace_enter *) raw_data; 425 raw_data = per_cpu_ptr(raw_data, cpu);
412 tracing_generic_entry_update(&rec->ent, 0, 0); 426
413 rec->ent.type = sys_data->enter_id; 427 /* zero the dead bytes from align to not leak stack to user */
414 rec->nr = syscall_nr; 428 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
415 syscall_get_arguments(current, regs, 0, sys_data->nb_args, 429
416 (unsigned long *)&rec->args); 430 rec = (struct syscall_trace_enter *) raw_data;
417 perf_tpcounter_event(sys_data->enter_id, 0, 1, rec, size); 431 tracing_generic_entry_update(&rec->ent, 0, 0);
418 } while(0); 432 rec->ent.type = sys_data->enter_id;
433 rec->nr = syscall_nr;
434 syscall_get_arguments(current, regs, 0, sys_data->nb_args,
435 (unsigned long *)&rec->args);
436 perf_tpcounter_event(sys_data->enter_id, 0, 1, rec, size);
437
438end:
439 local_irq_restore(flags);
419} 440}
420 441
421int reg_prof_syscall_enter(char *name) 442int reg_prof_syscall_enter(char *name)
@@ -460,8 +481,12 @@ void unreg_prof_syscall_enter(char *name)
460static void prof_syscall_exit(struct pt_regs *regs, long ret) 481static void prof_syscall_exit(struct pt_regs *regs, long ret)
461{ 482{
462 struct syscall_metadata *sys_data; 483 struct syscall_metadata *sys_data;
463 struct syscall_trace_exit rec; 484 struct syscall_trace_exit *rec;
485 unsigned long flags;
464 int syscall_nr; 486 int syscall_nr;
487 char *raw_data;
488 int size;
489 int cpu;
465 490
466 syscall_nr = syscall_get_nr(current, regs); 491 syscall_nr = syscall_get_nr(current, regs);
467 if (!test_bit(syscall_nr, enabled_prof_exit_syscalls)) 492 if (!test_bit(syscall_nr, enabled_prof_exit_syscalls))
@@ -471,12 +496,46 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
471 if (!sys_data) 496 if (!sys_data)
472 return; 497 return;
473 498
474 tracing_generic_entry_update(&rec.ent, 0, 0); 499 /* We can probably do that at build time */
475 rec.ent.type = sys_data->exit_id; 500 size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64));
476 rec.nr = syscall_nr; 501 size -= sizeof(u32);
477 rec.ret = syscall_get_return_value(current, regs);
478 502
479 perf_tpcounter_event(sys_data->exit_id, 0, 1, &rec, sizeof(rec)); 503 /*
504 * Impossible, but be paranoid with the future
505 * How to put this check outside runtime?
506 */
507 if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
508 "exit event has grown above profile buffer size"))
509 return;
510
511 /* Protect the per cpu buffer, begin the rcu read side */
512 local_irq_save(flags);
513 cpu = smp_processor_id();
514
515 if (in_nmi())
516 raw_data = rcu_dereference(trace_profile_buf_nmi);
517 else
518 raw_data = rcu_dereference(trace_profile_buf);
519
520 if (!raw_data)
521 goto end;
522
523 raw_data = per_cpu_ptr(raw_data, cpu);
524
525 /* zero the dead bytes from align to not leak stack to user */
526 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
527
528 rec = (struct syscall_trace_exit *)raw_data;
529
530 tracing_generic_entry_update(&rec->ent, 0, 0);
531 rec->ent.type = sys_data->exit_id;
532 rec->nr = syscall_nr;
533 rec->ret = syscall_get_return_value(current, regs);
534
535 perf_tpcounter_event(sys_data->exit_id, 0, 1, rec, size);
536
537end:
538 local_irq_restore(flags);
480} 539}
481 540
482int reg_prof_syscall_exit(char *name) 541int reg_prof_syscall_exit(char *name)