diff options
author | Borislav Petkov <borislav.petkov@amd.com> | 2011-10-16 11:15:04 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2011-11-14 07:31:26 -0500 |
commit | 9251f904f95175b4a1d8cbc0449e748f9edd7629 (patch) | |
tree | 585b73028c0c6b955d1759faf8544870eae213f5 /kernel/events | |
parent | efc96737bd82b508794d2b28061a12af4a3f7766 (diff) |
perf: Carve out callchain functionality
Split the callchain code from the perf events core into
a new kernel/events/callchain.c file.
This simplifies a bit the big core.c
Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Stephane Eranian <eranian@google.com>
[keep ctx recursion handling inline and use internal headers]
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1318778104-17152-1-git-send-email-fweisbec@gmail.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/events')
-rw-r--r-- | kernel/events/Makefile | 2 | ||||
-rw-r--r-- | kernel/events/callchain.c | 191 | ||||
-rw-r--r-- | kernel/events/core.c | 209 | ||||
-rw-r--r-- | kernel/events/internal.h | 39 |
4 files changed, 230 insertions, 211 deletions
diff --git a/kernel/events/Makefile b/kernel/events/Makefile index 89e5e8aa4c36..22d901f9caf4 100644 --- a/kernel/events/Makefile +++ b/kernel/events/Makefile | |||
@@ -2,5 +2,5 @@ ifdef CONFIG_FUNCTION_TRACER | |||
2 | CFLAGS_REMOVE_core.o = -pg | 2 | CFLAGS_REMOVE_core.o = -pg |
3 | endif | 3 | endif |
4 | 4 | ||
5 | obj-y := core.o ring_buffer.o | 5 | obj-y := core.o ring_buffer.o callchain.o |
6 | obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o | 6 | obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o |
diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c new file mode 100644 index 000000000000..057e24b665cf --- /dev/null +++ b/kernel/events/callchain.c | |||
@@ -0,0 +1,191 @@ | |||
1 | /* | ||
2 | * Performance events callchain code, extracted from core.c: | ||
3 | * | ||
4 | * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de> | ||
5 | * Copyright (C) 2008-2011 Red Hat, Inc., Ingo Molnar | ||
6 | * Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> | ||
7 | * Copyright © 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> | ||
8 | * | ||
9 | * For licensing details see kernel-base/COPYING | ||
10 | */ | ||
11 | |||
12 | #include <linux/perf_event.h> | ||
13 | #include <linux/slab.h> | ||
14 | #include "internal.h" | ||
15 | |||
16 | struct callchain_cpus_entries { | ||
17 | struct rcu_head rcu_head; | ||
18 | struct perf_callchain_entry *cpu_entries[0]; | ||
19 | }; | ||
20 | |||
21 | static DEFINE_PER_CPU(int, callchain_recursion[PERF_NR_CONTEXTS]); | ||
22 | static atomic_t nr_callchain_events; | ||
23 | static DEFINE_MUTEX(callchain_mutex); | ||
24 | static struct callchain_cpus_entries *callchain_cpus_entries; | ||
25 | |||
26 | |||
27 | __weak void perf_callchain_kernel(struct perf_callchain_entry *entry, | ||
28 | struct pt_regs *regs) | ||
29 | { | ||
30 | } | ||
31 | |||
32 | __weak void perf_callchain_user(struct perf_callchain_entry *entry, | ||
33 | struct pt_regs *regs) | ||
34 | { | ||
35 | } | ||
36 | |||
37 | static void release_callchain_buffers_rcu(struct rcu_head *head) | ||
38 | { | ||
39 | struct callchain_cpus_entries *entries; | ||
40 | int cpu; | ||
41 | |||
42 | entries = container_of(head, struct callchain_cpus_entries, rcu_head); | ||
43 | |||
44 | for_each_possible_cpu(cpu) | ||
45 | kfree(entries->cpu_entries[cpu]); | ||
46 | |||
47 | kfree(entries); | ||
48 | } | ||
49 | |||
50 | static void release_callchain_buffers(void) | ||
51 | { | ||
52 | struct callchain_cpus_entries *entries; | ||
53 | |||
54 | entries = callchain_cpus_entries; | ||
55 | rcu_assign_pointer(callchain_cpus_entries, NULL); | ||
56 | call_rcu(&entries->rcu_head, release_callchain_buffers_rcu); | ||
57 | } | ||
58 | |||
59 | static int alloc_callchain_buffers(void) | ||
60 | { | ||
61 | int cpu; | ||
62 | int size; | ||
63 | struct callchain_cpus_entries *entries; | ||
64 | |||
65 | /* | ||
66 | * We can't use the percpu allocation API for data that can be | ||
67 | * accessed from NMI. Use a temporary manual per cpu allocation | ||
68 | * until that gets sorted out. | ||
69 | */ | ||
70 | size = offsetof(struct callchain_cpus_entries, cpu_entries[nr_cpu_ids]); | ||
71 | |||
72 | entries = kzalloc(size, GFP_KERNEL); | ||
73 | if (!entries) | ||
74 | return -ENOMEM; | ||
75 | |||
76 | size = sizeof(struct perf_callchain_entry) * PERF_NR_CONTEXTS; | ||
77 | |||
78 | for_each_possible_cpu(cpu) { | ||
79 | entries->cpu_entries[cpu] = kmalloc_node(size, GFP_KERNEL, | ||
80 | cpu_to_node(cpu)); | ||
81 | if (!entries->cpu_entries[cpu]) | ||
82 | goto fail; | ||
83 | } | ||
84 | |||
85 | rcu_assign_pointer(callchain_cpus_entries, entries); | ||
86 | |||
87 | return 0; | ||
88 | |||
89 | fail: | ||
90 | for_each_possible_cpu(cpu) | ||
91 | kfree(entries->cpu_entries[cpu]); | ||
92 | kfree(entries); | ||
93 | |||
94 | return -ENOMEM; | ||
95 | } | ||
96 | |||
97 | int get_callchain_buffers(void) | ||
98 | { | ||
99 | int err = 0; | ||
100 | int count; | ||
101 | |||
102 | mutex_lock(&callchain_mutex); | ||
103 | |||
104 | count = atomic_inc_return(&nr_callchain_events); | ||
105 | if (WARN_ON_ONCE(count < 1)) { | ||
106 | err = -EINVAL; | ||
107 | goto exit; | ||
108 | } | ||
109 | |||
110 | if (count > 1) { | ||
111 | /* If the allocation failed, give up */ | ||
112 | if (!callchain_cpus_entries) | ||
113 | err = -ENOMEM; | ||
114 | goto exit; | ||
115 | } | ||
116 | |||
117 | err = alloc_callchain_buffers(); | ||
118 | if (err) | ||
119 | release_callchain_buffers(); | ||
120 | exit: | ||
121 | mutex_unlock(&callchain_mutex); | ||
122 | |||
123 | return err; | ||
124 | } | ||
125 | |||
126 | void put_callchain_buffers(void) | ||
127 | { | ||
128 | if (atomic_dec_and_mutex_lock(&nr_callchain_events, &callchain_mutex)) { | ||
129 | release_callchain_buffers(); | ||
130 | mutex_unlock(&callchain_mutex); | ||
131 | } | ||
132 | } | ||
133 | |||
134 | static struct perf_callchain_entry *get_callchain_entry(int *rctx) | ||
135 | { | ||
136 | int cpu; | ||
137 | struct callchain_cpus_entries *entries; | ||
138 | |||
139 | *rctx = get_recursion_context(__get_cpu_var(callchain_recursion)); | ||
140 | if (*rctx == -1) | ||
141 | return NULL; | ||
142 | |||
143 | entries = rcu_dereference(callchain_cpus_entries); | ||
144 | if (!entries) | ||
145 | return NULL; | ||
146 | |||
147 | cpu = smp_processor_id(); | ||
148 | |||
149 | return &entries->cpu_entries[cpu][*rctx]; | ||
150 | } | ||
151 | |||
152 | static void | ||
153 | put_callchain_entry(int rctx) | ||
154 | { | ||
155 | put_recursion_context(__get_cpu_var(callchain_recursion), rctx); | ||
156 | } | ||
157 | |||
158 | struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) | ||
159 | { | ||
160 | int rctx; | ||
161 | struct perf_callchain_entry *entry; | ||
162 | |||
163 | |||
164 | entry = get_callchain_entry(&rctx); | ||
165 | if (rctx == -1) | ||
166 | return NULL; | ||
167 | |||
168 | if (!entry) | ||
169 | goto exit_put; | ||
170 | |||
171 | entry->nr = 0; | ||
172 | |||
173 | if (!user_mode(regs)) { | ||
174 | perf_callchain_store(entry, PERF_CONTEXT_KERNEL); | ||
175 | perf_callchain_kernel(entry, regs); | ||
176 | if (current->mm) | ||
177 | regs = task_pt_regs(current); | ||
178 | else | ||
179 | regs = NULL; | ||
180 | } | ||
181 | |||
182 | if (regs) { | ||
183 | perf_callchain_store(entry, PERF_CONTEXT_USER); | ||
184 | perf_callchain_user(entry, regs); | ||
185 | } | ||
186 | |||
187 | exit_put: | ||
188 | put_callchain_entry(rctx); | ||
189 | |||
190 | return entry; | ||
191 | } | ||
diff --git a/kernel/events/core.c b/kernel/events/core.c index 0e8457da6f95..eadac69265fc 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
@@ -2570,215 +2570,6 @@ static u64 perf_event_read(struct perf_event *event) | |||
2570 | } | 2570 | } |
2571 | 2571 | ||
2572 | /* | 2572 | /* |
2573 | * Callchain support | ||
2574 | */ | ||
2575 | |||
2576 | struct callchain_cpus_entries { | ||
2577 | struct rcu_head rcu_head; | ||
2578 | struct perf_callchain_entry *cpu_entries[0]; | ||
2579 | }; | ||
2580 | |||
2581 | static DEFINE_PER_CPU(int, callchain_recursion[PERF_NR_CONTEXTS]); | ||
2582 | static atomic_t nr_callchain_events; | ||
2583 | static DEFINE_MUTEX(callchain_mutex); | ||
2584 | struct callchain_cpus_entries *callchain_cpus_entries; | ||
2585 | |||
2586 | |||
2587 | __weak void perf_callchain_kernel(struct perf_callchain_entry *entry, | ||
2588 | struct pt_regs *regs) | ||
2589 | { | ||
2590 | } | ||
2591 | |||
2592 | __weak void perf_callchain_user(struct perf_callchain_entry *entry, | ||
2593 | struct pt_regs *regs) | ||
2594 | { | ||
2595 | } | ||
2596 | |||
2597 | static void release_callchain_buffers_rcu(struct rcu_head *head) | ||
2598 | { | ||
2599 | struct callchain_cpus_entries *entries; | ||
2600 | int cpu; | ||
2601 | |||
2602 | entries = container_of(head, struct callchain_cpus_entries, rcu_head); | ||
2603 | |||
2604 | for_each_possible_cpu(cpu) | ||
2605 | kfree(entries->cpu_entries[cpu]); | ||
2606 | |||
2607 | kfree(entries); | ||
2608 | } | ||
2609 | |||
2610 | static void release_callchain_buffers(void) | ||
2611 | { | ||
2612 | struct callchain_cpus_entries *entries; | ||
2613 | |||
2614 | entries = callchain_cpus_entries; | ||
2615 | rcu_assign_pointer(callchain_cpus_entries, NULL); | ||
2616 | call_rcu(&entries->rcu_head, release_callchain_buffers_rcu); | ||
2617 | } | ||
2618 | |||
2619 | static int alloc_callchain_buffers(void) | ||
2620 | { | ||
2621 | int cpu; | ||
2622 | int size; | ||
2623 | struct callchain_cpus_entries *entries; | ||
2624 | |||
2625 | /* | ||
2626 | * We can't use the percpu allocation API for data that can be | ||
2627 | * accessed from NMI. Use a temporary manual per cpu allocation | ||
2628 | * until that gets sorted out. | ||
2629 | */ | ||
2630 | size = offsetof(struct callchain_cpus_entries, cpu_entries[nr_cpu_ids]); | ||
2631 | |||
2632 | entries = kzalloc(size, GFP_KERNEL); | ||
2633 | if (!entries) | ||
2634 | return -ENOMEM; | ||
2635 | |||
2636 | size = sizeof(struct perf_callchain_entry) * PERF_NR_CONTEXTS; | ||
2637 | |||
2638 | for_each_possible_cpu(cpu) { | ||
2639 | entries->cpu_entries[cpu] = kmalloc_node(size, GFP_KERNEL, | ||
2640 | cpu_to_node(cpu)); | ||
2641 | if (!entries->cpu_entries[cpu]) | ||
2642 | goto fail; | ||
2643 | } | ||
2644 | |||
2645 | rcu_assign_pointer(callchain_cpus_entries, entries); | ||
2646 | |||
2647 | return 0; | ||
2648 | |||
2649 | fail: | ||
2650 | for_each_possible_cpu(cpu) | ||
2651 | kfree(entries->cpu_entries[cpu]); | ||
2652 | kfree(entries); | ||
2653 | |||
2654 | return -ENOMEM; | ||
2655 | } | ||
2656 | |||
2657 | static int get_callchain_buffers(void) | ||
2658 | { | ||
2659 | int err = 0; | ||
2660 | int count; | ||
2661 | |||
2662 | mutex_lock(&callchain_mutex); | ||
2663 | |||
2664 | count = atomic_inc_return(&nr_callchain_events); | ||
2665 | if (WARN_ON_ONCE(count < 1)) { | ||
2666 | err = -EINVAL; | ||
2667 | goto exit; | ||
2668 | } | ||
2669 | |||
2670 | if (count > 1) { | ||
2671 | /* If the allocation failed, give up */ | ||
2672 | if (!callchain_cpus_entries) | ||
2673 | err = -ENOMEM; | ||
2674 | goto exit; | ||
2675 | } | ||
2676 | |||
2677 | err = alloc_callchain_buffers(); | ||
2678 | if (err) | ||
2679 | release_callchain_buffers(); | ||
2680 | exit: | ||
2681 | mutex_unlock(&callchain_mutex); | ||
2682 | |||
2683 | return err; | ||
2684 | } | ||
2685 | |||
2686 | static void put_callchain_buffers(void) | ||
2687 | { | ||
2688 | if (atomic_dec_and_mutex_lock(&nr_callchain_events, &callchain_mutex)) { | ||
2689 | release_callchain_buffers(); | ||
2690 | mutex_unlock(&callchain_mutex); | ||
2691 | } | ||
2692 | } | ||
2693 | |||
2694 | static int get_recursion_context(int *recursion) | ||
2695 | { | ||
2696 | int rctx; | ||
2697 | |||
2698 | if (in_nmi()) | ||
2699 | rctx = 3; | ||
2700 | else if (in_irq()) | ||
2701 | rctx = 2; | ||
2702 | else if (in_softirq()) | ||
2703 | rctx = 1; | ||
2704 | else | ||
2705 | rctx = 0; | ||
2706 | |||
2707 | if (recursion[rctx]) | ||
2708 | return -1; | ||
2709 | |||
2710 | recursion[rctx]++; | ||
2711 | barrier(); | ||
2712 | |||
2713 | return rctx; | ||
2714 | } | ||
2715 | |||
2716 | static inline void put_recursion_context(int *recursion, int rctx) | ||
2717 | { | ||
2718 | barrier(); | ||
2719 | recursion[rctx]--; | ||
2720 | } | ||
2721 | |||
2722 | static struct perf_callchain_entry *get_callchain_entry(int *rctx) | ||
2723 | { | ||
2724 | int cpu; | ||
2725 | struct callchain_cpus_entries *entries; | ||
2726 | |||
2727 | *rctx = get_recursion_context(__get_cpu_var(callchain_recursion)); | ||
2728 | if (*rctx == -1) | ||
2729 | return NULL; | ||
2730 | |||
2731 | entries = rcu_dereference(callchain_cpus_entries); | ||
2732 | if (!entries) | ||
2733 | return NULL; | ||
2734 | |||
2735 | cpu = smp_processor_id(); | ||
2736 | |||
2737 | return &entries->cpu_entries[cpu][*rctx]; | ||
2738 | } | ||
2739 | |||
2740 | static void | ||
2741 | put_callchain_entry(int rctx) | ||
2742 | { | ||
2743 | put_recursion_context(__get_cpu_var(callchain_recursion), rctx); | ||
2744 | } | ||
2745 | |||
2746 | static struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) | ||
2747 | { | ||
2748 | int rctx; | ||
2749 | struct perf_callchain_entry *entry; | ||
2750 | |||
2751 | |||
2752 | entry = get_callchain_entry(&rctx); | ||
2753 | if (rctx == -1) | ||
2754 | return NULL; | ||
2755 | |||
2756 | if (!entry) | ||
2757 | goto exit_put; | ||
2758 | |||
2759 | entry->nr = 0; | ||
2760 | |||
2761 | if (!user_mode(regs)) { | ||
2762 | perf_callchain_store(entry, PERF_CONTEXT_KERNEL); | ||
2763 | perf_callchain_kernel(entry, regs); | ||
2764 | if (current->mm) | ||
2765 | regs = task_pt_regs(current); | ||
2766 | else | ||
2767 | regs = NULL; | ||
2768 | } | ||
2769 | |||
2770 | if (regs) { | ||
2771 | perf_callchain_store(entry, PERF_CONTEXT_USER); | ||
2772 | perf_callchain_user(entry, regs); | ||
2773 | } | ||
2774 | |||
2775 | exit_put: | ||
2776 | put_callchain_entry(rctx); | ||
2777 | |||
2778 | return entry; | ||
2779 | } | ||
2780 | |||
2781 | /* | ||
2782 | * Initialize the perf_event context in a task_struct: | 2573 | * Initialize the perf_event context in a task_struct: |
2783 | */ | 2574 | */ |
2784 | static void __perf_event_init_context(struct perf_event_context *ctx) | 2575 | static void __perf_event_init_context(struct perf_event_context *ctx) |
diff --git a/kernel/events/internal.h b/kernel/events/internal.h index 09097dd8116c..be4a43f6de4f 100644 --- a/kernel/events/internal.h +++ b/kernel/events/internal.h | |||
@@ -1,6 +1,10 @@ | |||
1 | #ifndef _KERNEL_EVENTS_INTERNAL_H | 1 | #ifndef _KERNEL_EVENTS_INTERNAL_H |
2 | #define _KERNEL_EVENTS_INTERNAL_H | 2 | #define _KERNEL_EVENTS_INTERNAL_H |
3 | 3 | ||
4 | #include <linux/hardirq.h> | ||
5 | |||
6 | /* Buffer handling */ | ||
7 | |||
4 | #define RING_BUFFER_WRITABLE 0x01 | 8 | #define RING_BUFFER_WRITABLE 0x01 |
5 | 9 | ||
6 | struct ring_buffer { | 10 | struct ring_buffer { |
@@ -64,7 +68,7 @@ static inline int page_order(struct ring_buffer *rb) | |||
64 | } | 68 | } |
65 | #endif | 69 | #endif |
66 | 70 | ||
67 | static unsigned long perf_data_size(struct ring_buffer *rb) | 71 | static inline unsigned long perf_data_size(struct ring_buffer *rb) |
68 | { | 72 | { |
69 | return rb->nr_pages << (PAGE_SHIFT + page_order(rb)); | 73 | return rb->nr_pages << (PAGE_SHIFT + page_order(rb)); |
70 | } | 74 | } |
@@ -93,4 +97,37 @@ __output_copy(struct perf_output_handle *handle, | |||
93 | } while (len); | 97 | } while (len); |
94 | } | 98 | } |
95 | 99 | ||
100 | /* Callchain handling */ | ||
101 | extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs); | ||
102 | extern int get_callchain_buffers(void); | ||
103 | extern void put_callchain_buffers(void); | ||
104 | |||
105 | static inline int get_recursion_context(int *recursion) | ||
106 | { | ||
107 | int rctx; | ||
108 | |||
109 | if (in_nmi()) | ||
110 | rctx = 3; | ||
111 | else if (in_irq()) | ||
112 | rctx = 2; | ||
113 | else if (in_softirq()) | ||
114 | rctx = 1; | ||
115 | else | ||
116 | rctx = 0; | ||
117 | |||
118 | if (recursion[rctx]) | ||
119 | return -1; | ||
120 | |||
121 | recursion[rctx]++; | ||
122 | barrier(); | ||
123 | |||
124 | return rctx; | ||
125 | } | ||
126 | |||
127 | static inline void put_recursion_context(int *recursion, int rctx) | ||
128 | { | ||
129 | barrier(); | ||
130 | recursion[rctx]--; | ||
131 | } | ||
132 | |||
96 | #endif /* _KERNEL_EVENTS_INTERNAL_H */ | 133 | #endif /* _KERNEL_EVENTS_INTERNAL_H */ |