diff options
author | Frederic Weisbecker <fweisbec@gmail.com> | 2009-09-18 00:10:28 -0400 |
---|---|---|
committer | Frederic Weisbecker <fweisbec@gmail.com> | 2009-09-18 01:25:44 -0400 |
commit | 20ab4425a77a1f34028cc6ce57053c22c184ba5f (patch) | |
tree | ca821b19593c3821fa13a520201537ad35e4c98d /include | |
parent | e5e25cf47b0bdd1f7e9b8bb6368ee48e16de0c87 (diff) |
tracing: Allocate the ftrace event profile buffer dynamically
Currently the trace event profile buffer is allocated in the stack. But
this may be too much for the stack, as the events can have large
statically defined field size and can also grow with dynamic arrays.
Allocate two per cpu buffer for all profiled events. The first cpu
buffer is used to host every non-nmi context traces. It is protected
by disabling the interrupts while writing and committing the trace.
The second buffer is reserved for nmi. So that there is no race between
them and the first buffer.
The whole write/commit section is rcu protected because we release
these buffers while deactivating the last profiling trace event.
v2: Move the buffers from trace_event to be global, as pointed by
Steven Rostedt.
v3: Fix the syscall events to handle the profiling buffer races
by disabling interrupts, now that the buffers are globals.
Suggested-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Jason Baron <jbaron@redhat.com>
Cc: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/ftrace_event.h | 6 | ||||
-rw-r--r-- | include/trace/ftrace.h | 83 |
2 files changed, 61 insertions, 28 deletions
diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index bc103d7b1ca8..4ec5e67e18cf 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h | |||
@@ -4,6 +4,7 @@ | |||
4 | #include <linux/ring_buffer.h> | 4 | #include <linux/ring_buffer.h> |
5 | #include <linux/trace_seq.h> | 5 | #include <linux/trace_seq.h> |
6 | #include <linux/percpu.h> | 6 | #include <linux/percpu.h> |
7 | #include <linux/hardirq.h> | ||
7 | 8 | ||
8 | struct trace_array; | 9 | struct trace_array; |
9 | struct tracer; | 10 | struct tracer; |
@@ -134,6 +135,11 @@ struct ftrace_event_call { | |||
134 | void (*profile_disable)(void); | 135 | void (*profile_disable)(void); |
135 | }; | 136 | }; |
136 | 137 | ||
138 | #define FTRACE_MAX_PROFILE_SIZE 2048 | ||
139 | |||
140 | extern char *trace_profile_buf; | ||
141 | extern char *trace_profile_buf_nmi; | ||
142 | |||
137 | #define MAX_FILTER_PRED 32 | 143 | #define MAX_FILTER_PRED 32 |
138 | #define MAX_FILTER_STR_VAL 256 /* Should handle KSYM_SYMBOL_LEN */ | 144 | #define MAX_FILTER_STR_VAL 256 /* Should handle KSYM_SYMBOL_LEN */ |
139 | 145 | ||
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index a822087857e9..a0361cb69769 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h | |||
@@ -648,11 +648,12 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ | |||
648 | * struct ftrace_raw_##call *entry; | 648 | * struct ftrace_raw_##call *entry; |
649 | * u64 __addr = 0, __count = 1; | 649 | * u64 __addr = 0, __count = 1; |
650 | * unsigned long irq_flags; | 650 | * unsigned long irq_flags; |
651 | * struct trace_entry *ent; | ||
651 | * int __entry_size; | 652 | * int __entry_size; |
652 | * int __data_size; | 653 | * int __data_size; |
654 | * int __cpu | ||
653 | * int pc; | 655 | * int pc; |
654 | * | 656 | * |
655 | * local_save_flags(irq_flags); | ||
656 | * pc = preempt_count(); | 657 | * pc = preempt_count(); |
657 | * | 658 | * |
658 | * __data_size = ftrace_get_offsets_<call>(&__data_offsets, args); | 659 | * __data_size = ftrace_get_offsets_<call>(&__data_offsets, args); |
@@ -663,25 +664,34 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ | |||
663 | * sizeof(u64)); | 664 | * sizeof(u64)); |
664 | * __entry_size -= sizeof(u32); | 665 | * __entry_size -= sizeof(u32); |
665 | * | 666 | * |
666 | * do { | 667 | * // Protect the non nmi buffer |
667 | * char raw_data[__entry_size]; <- allocate our sample in the stack | 668 | * // This also protects the rcu read side |
668 | * struct trace_entry *ent; | 669 | * local_irq_save(irq_flags); |
670 | * __cpu = smp_processor_id(); | ||
671 | * | ||
672 | * if (in_nmi()) | ||
673 | * raw_data = rcu_dereference(trace_profile_buf_nmi); | ||
674 | * else | ||
675 | * raw_data = rcu_dereference(trace_profile_buf); | ||
676 | * | ||
677 | * if (!raw_data) | ||
678 | * goto end; | ||
669 | * | 679 | * |
670 | * zero dead bytes from alignment to avoid stack leak to userspace: | 680 | * raw_data = per_cpu_ptr(raw_data, __cpu); |
671 | * | 681 | * |
672 | * *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL; | 682 | * //zero dead bytes from alignment to avoid stack leak to userspace: |
673 | * entry = (struct ftrace_raw_<call> *)raw_data; | 683 | * *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL; |
674 | * ent = &entry->ent; | 684 | * entry = (struct ftrace_raw_<call> *)raw_data; |
675 | * tracing_generic_entry_update(ent, irq_flags, pc); | 685 | * ent = &entry->ent; |
676 | * ent->type = event_call->id; | 686 | * tracing_generic_entry_update(ent, irq_flags, pc); |
687 | * ent->type = event_call->id; | ||
677 | * | 688 | * |
678 | * <tstruct> <- do some jobs with dynamic arrays | 689 | * <tstruct> <- do some jobs with dynamic arrays |
679 | * | 690 | * |
680 | * <assign> <- affect our values | 691 | * <assign> <- affect our values |
681 | * | 692 | * |
682 | * perf_tpcounter_event(event_call->id, __addr, __count, entry, | 693 | * perf_tpcounter_event(event_call->id, __addr, __count, entry, |
683 | * __entry_size); <- submit them to perf counter | 694 | * __entry_size); <- submit them to perf counter |
684 | * } while (0); | ||
685 | * | 695 | * |
686 | * } | 696 | * } |
687 | */ | 697 | */ |
@@ -704,11 +714,13 @@ static void ftrace_profile_##call(proto) \ | |||
704 | struct ftrace_raw_##call *entry; \ | 714 | struct ftrace_raw_##call *entry; \ |
705 | u64 __addr = 0, __count = 1; \ | 715 | u64 __addr = 0, __count = 1; \ |
706 | unsigned long irq_flags; \ | 716 | unsigned long irq_flags; \ |
717 | struct trace_entry *ent; \ | ||
707 | int __entry_size; \ | 718 | int __entry_size; \ |
708 | int __data_size; \ | 719 | int __data_size; \ |
720 | char *raw_data; \ | ||
721 | int __cpu; \ | ||
709 | int pc; \ | 722 | int pc; \ |
710 | \ | 723 | \ |
711 | local_save_flags(irq_flags); \ | ||
712 | pc = preempt_count(); \ | 724 | pc = preempt_count(); \ |
713 | \ | 725 | \ |
714 | __data_size = ftrace_get_offsets_##call(&__data_offsets, args); \ | 726 | __data_size = ftrace_get_offsets_##call(&__data_offsets, args); \ |
@@ -716,23 +728,38 @@ static void ftrace_profile_##call(proto) \ | |||
716 | sizeof(u64)); \ | 728 | sizeof(u64)); \ |
717 | __entry_size -= sizeof(u32); \ | 729 | __entry_size -= sizeof(u32); \ |
718 | \ | 730 | \ |
719 | do { \ | 731 | if (WARN_ONCE(__entry_size > FTRACE_MAX_PROFILE_SIZE, \ |
720 | char raw_data[__entry_size]; \ | 732 | "profile buffer not large enough")) \ |
721 | struct trace_entry *ent; \ | 733 | return; \ |
734 | \ | ||
735 | local_irq_save(irq_flags); \ | ||
736 | __cpu = smp_processor_id(); \ | ||
722 | \ | 737 | \ |
723 | *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL; \ | 738 | if (in_nmi()) \ |
724 | entry = (struct ftrace_raw_##call *)raw_data; \ | 739 | raw_data = rcu_dereference(trace_profile_buf_nmi); \ |
725 | ent = &entry->ent; \ | 740 | else \ |
726 | tracing_generic_entry_update(ent, irq_flags, pc); \ | 741 | raw_data = rcu_dereference(trace_profile_buf); \ |
727 | ent->type = event_call->id; \ | ||
728 | \ | 742 | \ |
729 | tstruct \ | 743 | if (!raw_data) \ |
744 | goto end; \ | ||
730 | \ | 745 | \ |
731 | { assign; } \ | 746 | raw_data = per_cpu_ptr(raw_data, __cpu); \ |
732 | \ | 747 | \ |
733 | perf_tpcounter_event(event_call->id, __addr, __count, entry,\ | 748 | *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL; \ |
749 | entry = (struct ftrace_raw_##call *)raw_data; \ | ||
750 | ent = &entry->ent; \ | ||
751 | tracing_generic_entry_update(ent, irq_flags, pc); \ | ||
752 | ent->type = event_call->id; \ | ||
753 | \ | ||
754 | tstruct \ | ||
755 | \ | ||
756 | { assign; } \ | ||
757 | \ | ||
758 | perf_tpcounter_event(event_call->id, __addr, __count, entry, \ | ||
734 | __entry_size); \ | 759 | __entry_size); \ |
735 | } while (0); \ | 760 | \ |
761 | end: \ | ||
762 | local_irq_restore(irq_flags); \ | ||
736 | \ | 763 | \ |
737 | } | 764 | } |
738 | 765 | ||