diff options
76 files changed, 4431 insertions, 902 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index 7f418bbc261a..eef3bbb97075 100644 --- a/arch/Kconfig +++ b/arch/Kconfig | |||
@@ -126,4 +126,11 @@ config HAVE_DMA_API_DEBUG | |||
126 | config HAVE_DEFAULT_NO_SPIN_MUTEXES | 126 | config HAVE_DEFAULT_NO_SPIN_MUTEXES |
127 | bool | 127 | bool |
128 | 128 | ||
129 | config HAVE_HW_BREAKPOINT | ||
130 | bool | ||
131 | depends on HAVE_PERF_EVENTS | ||
132 | select ANON_INODES | ||
133 | select PERF_EVENTS | ||
134 | |||
135 | |||
129 | source "kernel/gcov/Kconfig" | 136 | source "kernel/gcov/Kconfig" |
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 72ace9515a07..178084b4377c 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -49,6 +49,7 @@ config X86 | |||
49 | select HAVE_KERNEL_GZIP | 49 | select HAVE_KERNEL_GZIP |
50 | select HAVE_KERNEL_BZIP2 | 50 | select HAVE_KERNEL_BZIP2 |
51 | select HAVE_KERNEL_LZMA | 51 | select HAVE_KERNEL_LZMA |
52 | select HAVE_HW_BREAKPOINT | ||
52 | select HAVE_ARCH_KMEMCHECK | 53 | select HAVE_ARCH_KMEMCHECK |
53 | 54 | ||
54 | config OUTPUT_FORMAT | 55 | config OUTPUT_FORMAT |
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild index 4a8e80cdcfa5..9f828f87ca35 100644 --- a/arch/x86/include/asm/Kbuild +++ b/arch/x86/include/asm/Kbuild | |||
@@ -10,6 +10,7 @@ header-y += ptrace-abi.h | |||
10 | header-y += sigcontext32.h | 10 | header-y += sigcontext32.h |
11 | header-y += ucontext.h | 11 | header-y += ucontext.h |
12 | header-y += processor-flags.h | 12 | header-y += processor-flags.h |
13 | header-y += hw_breakpoint.h | ||
13 | 14 | ||
14 | unifdef-y += e820.h | 15 | unifdef-y += e820.h |
15 | unifdef-y += ist.h | 16 | unifdef-y += ist.h |
diff --git a/arch/x86/include/asm/a.out-core.h b/arch/x86/include/asm/a.out-core.h index bb70e397aa84..7a15588e45d4 100644 --- a/arch/x86/include/asm/a.out-core.h +++ b/arch/x86/include/asm/a.out-core.h | |||
@@ -17,6 +17,7 @@ | |||
17 | 17 | ||
18 | #include <linux/user.h> | 18 | #include <linux/user.h> |
19 | #include <linux/elfcore.h> | 19 | #include <linux/elfcore.h> |
20 | #include <asm/debugreg.h> | ||
20 | 21 | ||
21 | /* | 22 | /* |
22 | * fill in the user structure for an a.out core dump | 23 | * fill in the user structure for an a.out core dump |
@@ -32,14 +33,7 @@ static inline void aout_dump_thread(struct pt_regs *regs, struct user *dump) | |||
32 | >> PAGE_SHIFT; | 33 | >> PAGE_SHIFT; |
33 | dump->u_dsize -= dump->u_tsize; | 34 | dump->u_dsize -= dump->u_tsize; |
34 | dump->u_ssize = 0; | 35 | dump->u_ssize = 0; |
35 | dump->u_debugreg[0] = current->thread.debugreg0; | 36 | aout_dump_debugregs(dump); |
36 | dump->u_debugreg[1] = current->thread.debugreg1; | ||
37 | dump->u_debugreg[2] = current->thread.debugreg2; | ||
38 | dump->u_debugreg[3] = current->thread.debugreg3; | ||
39 | dump->u_debugreg[4] = 0; | ||
40 | dump->u_debugreg[5] = 0; | ||
41 | dump->u_debugreg[6] = current->thread.debugreg6; | ||
42 | dump->u_debugreg[7] = current->thread.debugreg7; | ||
43 | 37 | ||
44 | if (dump->start_stack < TASK_SIZE) | 38 | if (dump->start_stack < TASK_SIZE) |
45 | dump->u_ssize = ((unsigned long)(TASK_SIZE - dump->start_stack)) | 39 | dump->u_ssize = ((unsigned long)(TASK_SIZE - dump->start_stack)) |
diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h index 3ea6f37be9e2..fdabd8435765 100644 --- a/arch/x86/include/asm/debugreg.h +++ b/arch/x86/include/asm/debugreg.h | |||
@@ -18,6 +18,7 @@ | |||
18 | #define DR_TRAP1 (0x2) /* db1 */ | 18 | #define DR_TRAP1 (0x2) /* db1 */ |
19 | #define DR_TRAP2 (0x4) /* db2 */ | 19 | #define DR_TRAP2 (0x4) /* db2 */ |
20 | #define DR_TRAP3 (0x8) /* db3 */ | 20 | #define DR_TRAP3 (0x8) /* db3 */ |
21 | #define DR_TRAP_BITS (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3) | ||
21 | 22 | ||
22 | #define DR_STEP (0x4000) /* single-step */ | 23 | #define DR_STEP (0x4000) /* single-step */ |
23 | #define DR_SWITCH (0x8000) /* task switch */ | 24 | #define DR_SWITCH (0x8000) /* task switch */ |
@@ -49,6 +50,8 @@ | |||
49 | 50 | ||
50 | #define DR_LOCAL_ENABLE_SHIFT 0 /* Extra shift to the local enable bit */ | 51 | #define DR_LOCAL_ENABLE_SHIFT 0 /* Extra shift to the local enable bit */ |
51 | #define DR_GLOBAL_ENABLE_SHIFT 1 /* Extra shift to the global enable bit */ | 52 | #define DR_GLOBAL_ENABLE_SHIFT 1 /* Extra shift to the global enable bit */ |
53 | #define DR_LOCAL_ENABLE (0x1) /* Local enable for reg 0 */ | ||
54 | #define DR_GLOBAL_ENABLE (0x2) /* Global enable for reg 0 */ | ||
52 | #define DR_ENABLE_SIZE 2 /* 2 enable bits per register */ | 55 | #define DR_ENABLE_SIZE 2 /* 2 enable bits per register */ |
53 | 56 | ||
54 | #define DR_LOCAL_ENABLE_MASK (0x55) /* Set local bits for all 4 regs */ | 57 | #define DR_LOCAL_ENABLE_MASK (0x55) /* Set local bits for all 4 regs */ |
@@ -67,4 +70,34 @@ | |||
67 | #define DR_LOCAL_SLOWDOWN (0x100) /* Local slow the pipeline */ | 70 | #define DR_LOCAL_SLOWDOWN (0x100) /* Local slow the pipeline */ |
68 | #define DR_GLOBAL_SLOWDOWN (0x200) /* Global slow the pipeline */ | 71 | #define DR_GLOBAL_SLOWDOWN (0x200) /* Global slow the pipeline */ |
69 | 72 | ||
73 | /* | ||
74 | * HW breakpoint additions | ||
75 | */ | ||
76 | #ifdef __KERNEL__ | ||
77 | |||
78 | DECLARE_PER_CPU(unsigned long, dr7); | ||
79 | |||
80 | static inline void hw_breakpoint_disable(void) | ||
81 | { | ||
82 | /* Zero the control register for HW Breakpoint */ | ||
83 | set_debugreg(0UL, 7); | ||
84 | |||
85 | /* Zero-out the individual HW breakpoint address registers */ | ||
86 | set_debugreg(0UL, 0); | ||
87 | set_debugreg(0UL, 1); | ||
88 | set_debugreg(0UL, 2); | ||
89 | set_debugreg(0UL, 3); | ||
90 | } | ||
91 | |||
92 | static inline int hw_breakpoint_active(void) | ||
93 | { | ||
94 | return __get_cpu_var(dr7) & DR_GLOBAL_ENABLE_MASK; | ||
95 | } | ||
96 | |||
97 | extern void aout_dump_debugregs(struct user *dump); | ||
98 | |||
99 | extern void hw_breakpoint_restore(void); | ||
100 | |||
101 | #endif /* __KERNEL__ */ | ||
102 | |||
70 | #endif /* _ASM_X86_DEBUGREG_H */ | 103 | #endif /* _ASM_X86_DEBUGREG_H */ |
diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h new file mode 100644 index 000000000000..0675a7c4c20e --- /dev/null +++ b/arch/x86/include/asm/hw_breakpoint.h | |||
@@ -0,0 +1,73 @@ | |||
1 | #ifndef _I386_HW_BREAKPOINT_H | ||
2 | #define _I386_HW_BREAKPOINT_H | ||
3 | |||
4 | #ifdef __KERNEL__ | ||
5 | #define __ARCH_HW_BREAKPOINT_H | ||
6 | |||
7 | /* | ||
8 | * The name should probably be something dealt in | ||
9 | * a higher level. While dealing with the user | ||
10 | * (display/resolving) | ||
11 | */ | ||
12 | struct arch_hw_breakpoint { | ||
13 | char *name; /* Contains name of the symbol to set bkpt */ | ||
14 | unsigned long address; | ||
15 | u8 len; | ||
16 | u8 type; | ||
17 | }; | ||
18 | |||
19 | #include <linux/kdebug.h> | ||
20 | #include <linux/percpu.h> | ||
21 | #include <linux/list.h> | ||
22 | |||
23 | /* Available HW breakpoint length encodings */ | ||
24 | #define X86_BREAKPOINT_LEN_1 0x40 | ||
25 | #define X86_BREAKPOINT_LEN_2 0x44 | ||
26 | #define X86_BREAKPOINT_LEN_4 0x4c | ||
27 | #define X86_BREAKPOINT_LEN_EXECUTE 0x40 | ||
28 | |||
29 | #ifdef CONFIG_X86_64 | ||
30 | #define X86_BREAKPOINT_LEN_8 0x48 | ||
31 | #endif | ||
32 | |||
33 | /* Available HW breakpoint type encodings */ | ||
34 | |||
35 | /* trigger on instruction execute */ | ||
36 | #define X86_BREAKPOINT_EXECUTE 0x80 | ||
37 | /* trigger on memory write */ | ||
38 | #define X86_BREAKPOINT_WRITE 0x81 | ||
39 | /* trigger on memory read or write */ | ||
40 | #define X86_BREAKPOINT_RW 0x83 | ||
41 | |||
42 | /* Total number of available HW breakpoint registers */ | ||
43 | #define HBP_NUM 4 | ||
44 | |||
45 | struct perf_event; | ||
46 | struct pmu; | ||
47 | |||
48 | extern int arch_check_va_in_userspace(unsigned long va, u8 hbp_len); | ||
49 | extern int arch_validate_hwbkpt_settings(struct perf_event *bp, | ||
50 | struct task_struct *tsk); | ||
51 | extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused, | ||
52 | unsigned long val, void *data); | ||
53 | |||
54 | |||
55 | int arch_install_hw_breakpoint(struct perf_event *bp); | ||
56 | void arch_uninstall_hw_breakpoint(struct perf_event *bp); | ||
57 | void hw_breakpoint_pmu_read(struct perf_event *bp); | ||
58 | void hw_breakpoint_pmu_unthrottle(struct perf_event *bp); | ||
59 | |||
60 | extern void | ||
61 | arch_fill_perf_breakpoint(struct perf_event *bp); | ||
62 | |||
63 | unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type); | ||
64 | int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, unsigned *type); | ||
65 | |||
66 | extern int arch_bp_generic_fields(int x86_len, int x86_type, | ||
67 | int *gen_len, int *gen_type); | ||
68 | |||
69 | extern struct pmu perf_ops_bp; | ||
70 | |||
71 | #endif /* __KERNEL__ */ | ||
72 | #endif /* _I386_HW_BREAKPOINT_H */ | ||
73 | |||
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index c9786480f0fe..6f8ec1c37e0a 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h | |||
@@ -30,6 +30,7 @@ struct mm_struct; | |||
30 | #include <linux/math64.h> | 30 | #include <linux/math64.h> |
31 | #include <linux/init.h> | 31 | #include <linux/init.h> |
32 | 32 | ||
33 | #define HBP_NUM 4 | ||
33 | /* | 34 | /* |
34 | * Default implementation of macro that returns current | 35 | * Default implementation of macro that returns current |
35 | * instruction pointer ("program counter"). | 36 | * instruction pointer ("program counter"). |
@@ -422,6 +423,8 @@ extern unsigned int xstate_size; | |||
422 | extern void free_thread_xstate(struct task_struct *); | 423 | extern void free_thread_xstate(struct task_struct *); |
423 | extern struct kmem_cache *task_xstate_cachep; | 424 | extern struct kmem_cache *task_xstate_cachep; |
424 | 425 | ||
426 | struct perf_event; | ||
427 | |||
425 | struct thread_struct { | 428 | struct thread_struct { |
426 | /* Cached TLS descriptors: */ | 429 | /* Cached TLS descriptors: */ |
427 | struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES]; | 430 | struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES]; |
@@ -443,13 +446,10 @@ struct thread_struct { | |||
443 | unsigned long fs; | 446 | unsigned long fs; |
444 | #endif | 447 | #endif |
445 | unsigned long gs; | 448 | unsigned long gs; |
446 | /* Hardware debugging registers: */ | 449 | /* Save middle states of ptrace breakpoints */ |
447 | unsigned long debugreg0; | 450 | struct perf_event *ptrace_bps[HBP_NUM]; |
448 | unsigned long debugreg1; | 451 | /* Debug status used for traps, single steps, etc... */ |
449 | unsigned long debugreg2; | 452 | unsigned long debugreg6; |
450 | unsigned long debugreg3; | ||
451 | unsigned long debugreg6; | ||
452 | unsigned long debugreg7; | ||
453 | /* Fault info: */ | 453 | /* Fault info: */ |
454 | unsigned long cr2; | 454 | unsigned long cr2; |
455 | unsigned long trap_no; | 455 | unsigned long trap_no; |
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index d8e5d0cdd678..4f2e66e29ecc 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -40,7 +40,7 @@ obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o | |||
40 | obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o | 40 | obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o |
41 | obj-y += bootflag.o e820.o | 41 | obj-y += bootflag.o e820.o |
42 | obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o | 42 | obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o |
43 | obj-y += alternative.o i8253.o pci-nommu.o | 43 | obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o |
44 | obj-y += tsc.o io_delay.o rtc.o | 44 | obj-y += tsc.o io_delay.o rtc.o |
45 | 45 | ||
46 | obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o | 46 | obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o |
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 68537e957a9b..1d2cb383410e 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile | |||
@@ -5,6 +5,7 @@ | |||
5 | # Don't trace early stages of a secondary CPU boot | 5 | # Don't trace early stages of a secondary CPU boot |
6 | ifdef CONFIG_FUNCTION_TRACER | 6 | ifdef CONFIG_FUNCTION_TRACER |
7 | CFLAGS_REMOVE_common.o = -pg | 7 | CFLAGS_REMOVE_common.o = -pg |
8 | CFLAGS_REMOVE_perf_event.o = -pg | ||
8 | endif | 9 | endif |
9 | 10 | ||
10 | # Make sure load_percpu_segment has no stackprotector | 11 | # Make sure load_percpu_segment has no stackprotector |
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c new file mode 100644 index 000000000000..4d267fb77828 --- /dev/null +++ b/arch/x86/kernel/hw_breakpoint.c | |||
@@ -0,0 +1,549 @@ | |||
1 | /* | ||
2 | * This program is free software; you can redistribute it and/or modify | ||
3 | * it under the terms of the GNU General Public License as published by | ||
4 | * the Free Software Foundation; either version 2 of the License, or | ||
5 | * (at your option) any later version. | ||
6 | * | ||
7 | * This program is distributed in the hope that it will be useful, | ||
8 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
10 | * GNU General Public License for more details. | ||
11 | * | ||
12 | * You should have received a copy of the GNU General Public License | ||
13 | * along with this program; if not, write to the Free Software | ||
14 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
15 | * | ||
16 | * Copyright (C) 2007 Alan Stern | ||
17 | * Copyright (C) 2009 IBM Corporation | ||
18 | * Copyright (C) 2009 Frederic Weisbecker <fweisbec@gmail.com> | ||
19 | * | ||
20 | * Authors: Alan Stern <stern@rowland.harvard.edu> | ||
21 | * K.Prasad <prasad@linux.vnet.ibm.com> | ||
22 | * Frederic Weisbecker <fweisbec@gmail.com> | ||
23 | */ | ||
24 | |||
25 | /* | ||
26 | * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility, | ||
27 | * using the CPU's debug registers. | ||
28 | */ | ||
29 | |||
30 | #include <linux/perf_event.h> | ||
31 | #include <linux/hw_breakpoint.h> | ||
32 | #include <linux/irqflags.h> | ||
33 | #include <linux/notifier.h> | ||
34 | #include <linux/kallsyms.h> | ||
35 | #include <linux/kprobes.h> | ||
36 | #include <linux/percpu.h> | ||
37 | #include <linux/kdebug.h> | ||
38 | #include <linux/kernel.h> | ||
39 | #include <linux/module.h> | ||
40 | #include <linux/sched.h> | ||
41 | #include <linux/init.h> | ||
42 | #include <linux/smp.h> | ||
43 | |||
44 | #include <asm/hw_breakpoint.h> | ||
45 | #include <asm/processor.h> | ||
46 | #include <asm/debugreg.h> | ||
47 | |||
48 | /* Per cpu debug control register value */ | ||
49 | DEFINE_PER_CPU(unsigned long, dr7); | ||
50 | EXPORT_PER_CPU_SYMBOL(dr7); | ||
51 | |||
52 | /* Per cpu debug address registers values */ | ||
53 | static DEFINE_PER_CPU(unsigned long, cpu_debugreg[HBP_NUM]); | ||
54 | |||
55 | /* | ||
56 | * Stores the breakpoints currently in use on each breakpoint address | ||
57 | * register for each cpus | ||
58 | */ | ||
59 | static DEFINE_PER_CPU(struct perf_event *, bp_per_reg[HBP_NUM]); | ||
60 | |||
61 | |||
62 | /* | ||
63 | * Encode the length, type, Exact, and Enable bits for a particular breakpoint | ||
64 | * as stored in debug register 7. | ||
65 | */ | ||
66 | unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type) | ||
67 | { | ||
68 | unsigned long bp_info; | ||
69 | |||
70 | bp_info = (len | type) & 0xf; | ||
71 | bp_info <<= (DR_CONTROL_SHIFT + drnum * DR_CONTROL_SIZE); | ||
72 | bp_info |= (DR_GLOBAL_ENABLE << (drnum * DR_ENABLE_SIZE)) | | ||
73 | DR_GLOBAL_SLOWDOWN; | ||
74 | return bp_info; | ||
75 | } | ||
76 | |||
77 | /* | ||
78 | * Decode the length and type bits for a particular breakpoint as | ||
79 | * stored in debug register 7. Return the "enabled" status. | ||
80 | */ | ||
81 | int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, unsigned *type) | ||
82 | { | ||
83 | int bp_info = dr7 >> (DR_CONTROL_SHIFT + bpnum * DR_CONTROL_SIZE); | ||
84 | |||
85 | *len = (bp_info & 0xc) | 0x40; | ||
86 | *type = (bp_info & 0x3) | 0x80; | ||
87 | |||
88 | return (dr7 >> (bpnum * DR_ENABLE_SIZE)) & 0x3; | ||
89 | } | ||
90 | |||
91 | /* | ||
92 | * Install a perf counter breakpoint. | ||
93 | * | ||
94 | * We seek a free debug address register and use it for this | ||
95 | * breakpoint. Eventually we enable it in the debug control register. | ||
96 | * | ||
97 | * Atomic: we hold the counter->ctx->lock and we only handle variables | ||
98 | * and registers local to this cpu. | ||
99 | */ | ||
100 | int arch_install_hw_breakpoint(struct perf_event *bp) | ||
101 | { | ||
102 | struct arch_hw_breakpoint *info = counter_arch_bp(bp); | ||
103 | unsigned long *dr7; | ||
104 | int i; | ||
105 | |||
106 | for (i = 0; i < HBP_NUM; i++) { | ||
107 | struct perf_event **slot = &__get_cpu_var(bp_per_reg[i]); | ||
108 | |||
109 | if (!*slot) { | ||
110 | *slot = bp; | ||
111 | break; | ||
112 | } | ||
113 | } | ||
114 | |||
115 | if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot")) | ||
116 | return -EBUSY; | ||
117 | |||
118 | set_debugreg(info->address, i); | ||
119 | __get_cpu_var(cpu_debugreg[i]) = info->address; | ||
120 | |||
121 | dr7 = &__get_cpu_var(dr7); | ||
122 | *dr7 |= encode_dr7(i, info->len, info->type); | ||
123 | |||
124 | set_debugreg(*dr7, 7); | ||
125 | |||
126 | return 0; | ||
127 | } | ||
128 | |||
129 | /* | ||
130 | * Uninstall the breakpoint contained in the given counter. | ||
131 | * | ||
132 | * First we search the debug address register it uses and then we disable | ||
133 | * it. | ||
134 | * | ||
135 | * Atomic: we hold the counter->ctx->lock and we only handle variables | ||
136 | * and registers local to this cpu. | ||
137 | */ | ||
138 | void arch_uninstall_hw_breakpoint(struct perf_event *bp) | ||
139 | { | ||
140 | struct arch_hw_breakpoint *info = counter_arch_bp(bp); | ||
141 | unsigned long *dr7; | ||
142 | int i; | ||
143 | |||
144 | for (i = 0; i < HBP_NUM; i++) { | ||
145 | struct perf_event **slot = &__get_cpu_var(bp_per_reg[i]); | ||
146 | |||
147 | if (*slot == bp) { | ||
148 | *slot = NULL; | ||
149 | break; | ||
150 | } | ||
151 | } | ||
152 | |||
153 | if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot")) | ||
154 | return; | ||
155 | |||
156 | dr7 = &__get_cpu_var(dr7); | ||
157 | *dr7 &= ~encode_dr7(i, info->len, info->type); | ||
158 | |||
159 | set_debugreg(*dr7, 7); | ||
160 | } | ||
161 | |||
162 | static int get_hbp_len(u8 hbp_len) | ||
163 | { | ||
164 | unsigned int len_in_bytes = 0; | ||
165 | |||
166 | switch (hbp_len) { | ||
167 | case X86_BREAKPOINT_LEN_1: | ||
168 | len_in_bytes = 1; | ||
169 | break; | ||
170 | case X86_BREAKPOINT_LEN_2: | ||
171 | len_in_bytes = 2; | ||
172 | break; | ||
173 | case X86_BREAKPOINT_LEN_4: | ||
174 | len_in_bytes = 4; | ||
175 | break; | ||
176 | #ifdef CONFIG_X86_64 | ||
177 | case X86_BREAKPOINT_LEN_8: | ||
178 | len_in_bytes = 8; | ||
179 | break; | ||
180 | #endif | ||
181 | } | ||
182 | return len_in_bytes; | ||
183 | } | ||
184 | |||
185 | /* | ||
186 | * Check for virtual address in user space. | ||
187 | */ | ||
188 | int arch_check_va_in_userspace(unsigned long va, u8 hbp_len) | ||
189 | { | ||
190 | unsigned int len; | ||
191 | |||
192 | len = get_hbp_len(hbp_len); | ||
193 | |||
194 | return (va <= TASK_SIZE - len); | ||
195 | } | ||
196 | |||
197 | /* | ||
198 | * Check for virtual address in kernel space. | ||
199 | */ | ||
200 | static int arch_check_va_in_kernelspace(unsigned long va, u8 hbp_len) | ||
201 | { | ||
202 | unsigned int len; | ||
203 | |||
204 | len = get_hbp_len(hbp_len); | ||
205 | |||
206 | return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE); | ||
207 | } | ||
208 | |||
209 | /* | ||
210 | * Store a breakpoint's encoded address, length, and type. | ||
211 | */ | ||
212 | static int arch_store_info(struct perf_event *bp) | ||
213 | { | ||
214 | struct arch_hw_breakpoint *info = counter_arch_bp(bp); | ||
215 | /* | ||
216 | * For kernel-addresses, either the address or symbol name can be | ||
217 | * specified. | ||
218 | */ | ||
219 | if (info->name) | ||
220 | info->address = (unsigned long) | ||
221 | kallsyms_lookup_name(info->name); | ||
222 | if (info->address) | ||
223 | return 0; | ||
224 | |||
225 | return -EINVAL; | ||
226 | } | ||
227 | |||
228 | int arch_bp_generic_fields(int x86_len, int x86_type, | ||
229 | int *gen_len, int *gen_type) | ||
230 | { | ||
231 | /* Len */ | ||
232 | switch (x86_len) { | ||
233 | case X86_BREAKPOINT_LEN_1: | ||
234 | *gen_len = HW_BREAKPOINT_LEN_1; | ||
235 | break; | ||
236 | case X86_BREAKPOINT_LEN_2: | ||
237 | *gen_len = HW_BREAKPOINT_LEN_2; | ||
238 | break; | ||
239 | case X86_BREAKPOINT_LEN_4: | ||
240 | *gen_len = HW_BREAKPOINT_LEN_4; | ||
241 | break; | ||
242 | #ifdef CONFIG_X86_64 | ||
243 | case X86_BREAKPOINT_LEN_8: | ||
244 | *gen_len = HW_BREAKPOINT_LEN_8; | ||
245 | break; | ||
246 | #endif | ||
247 | default: | ||
248 | return -EINVAL; | ||
249 | } | ||
250 | |||
251 | /* Type */ | ||
252 | switch (x86_type) { | ||
253 | case X86_BREAKPOINT_EXECUTE: | ||
254 | *gen_type = HW_BREAKPOINT_X; | ||
255 | break; | ||
256 | case X86_BREAKPOINT_WRITE: | ||
257 | *gen_type = HW_BREAKPOINT_W; | ||
258 | break; | ||
259 | case X86_BREAKPOINT_RW: | ||
260 | *gen_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R; | ||
261 | break; | ||
262 | default: | ||
263 | return -EINVAL; | ||
264 | } | ||
265 | |||
266 | return 0; | ||
267 | } | ||
268 | |||
269 | |||
270 | static int arch_build_bp_info(struct perf_event *bp) | ||
271 | { | ||
272 | struct arch_hw_breakpoint *info = counter_arch_bp(bp); | ||
273 | |||
274 | info->address = bp->attr.bp_addr; | ||
275 | |||
276 | /* Len */ | ||
277 | switch (bp->attr.bp_len) { | ||
278 | case HW_BREAKPOINT_LEN_1: | ||
279 | info->len = X86_BREAKPOINT_LEN_1; | ||
280 | break; | ||
281 | case HW_BREAKPOINT_LEN_2: | ||
282 | info->len = X86_BREAKPOINT_LEN_2; | ||
283 | break; | ||
284 | case HW_BREAKPOINT_LEN_4: | ||
285 | info->len = X86_BREAKPOINT_LEN_4; | ||
286 | break; | ||
287 | #ifdef CONFIG_X86_64 | ||
288 | case HW_BREAKPOINT_LEN_8: | ||
289 | info->len = X86_BREAKPOINT_LEN_8; | ||
290 | break; | ||
291 | #endif | ||
292 | default: | ||
293 | return -EINVAL; | ||
294 | } | ||
295 | |||
296 | /* Type */ | ||
297 | switch (bp->attr.bp_type) { | ||
298 | case HW_BREAKPOINT_W: | ||
299 | info->type = X86_BREAKPOINT_WRITE; | ||
300 | break; | ||
301 | case HW_BREAKPOINT_W | HW_BREAKPOINT_R: | ||
302 | info->type = X86_BREAKPOINT_RW; | ||
303 | break; | ||
304 | case HW_BREAKPOINT_X: | ||
305 | info->type = X86_BREAKPOINT_EXECUTE; | ||
306 | break; | ||
307 | default: | ||
308 | return -EINVAL; | ||
309 | } | ||
310 | |||
311 | return 0; | ||
312 | } | ||
313 | /* | ||
314 | * Validate the arch-specific HW Breakpoint register settings | ||
315 | */ | ||
316 | int arch_validate_hwbkpt_settings(struct perf_event *bp, | ||
317 | struct task_struct *tsk) | ||
318 | { | ||
319 | struct arch_hw_breakpoint *info = counter_arch_bp(bp); | ||
320 | unsigned int align; | ||
321 | int ret; | ||
322 | |||
323 | |||
324 | ret = arch_build_bp_info(bp); | ||
325 | if (ret) | ||
326 | return ret; | ||
327 | |||
328 | ret = -EINVAL; | ||
329 | |||
330 | if (info->type == X86_BREAKPOINT_EXECUTE) | ||
331 | /* | ||
332 | * Ptrace-refactoring code | ||
333 | * For now, we'll allow instruction breakpoint only for user-space | ||
334 | * addresses | ||
335 | */ | ||
336 | if ((!arch_check_va_in_userspace(info->address, info->len)) && | ||
337 | info->len != X86_BREAKPOINT_EXECUTE) | ||
338 | return ret; | ||
339 | |||
340 | switch (info->len) { | ||
341 | case X86_BREAKPOINT_LEN_1: | ||
342 | align = 0; | ||
343 | break; | ||
344 | case X86_BREAKPOINT_LEN_2: | ||
345 | align = 1; | ||
346 | break; | ||
347 | case X86_BREAKPOINT_LEN_4: | ||
348 | align = 3; | ||
349 | break; | ||
350 | #ifdef CONFIG_X86_64 | ||
351 | case X86_BREAKPOINT_LEN_8: | ||
352 | align = 7; | ||
353 | break; | ||
354 | #endif | ||
355 | default: | ||
356 | return ret; | ||
357 | } | ||
358 | |||
359 | if (bp->callback) | ||
360 | ret = arch_store_info(bp); | ||
361 | |||
362 | if (ret < 0) | ||
363 | return ret; | ||
364 | /* | ||
365 | * Check that the low-order bits of the address are appropriate | ||
366 | * for the alignment implied by len. | ||
367 | */ | ||
368 | if (info->address & align) | ||
369 | return -EINVAL; | ||
370 | |||
371 | /* Check that the virtual address is in the proper range */ | ||
372 | if (tsk) { | ||
373 | if (!arch_check_va_in_userspace(info->address, info->len)) | ||
374 | return -EFAULT; | ||
375 | } else { | ||
376 | if (!arch_check_va_in_kernelspace(info->address, info->len)) | ||
377 | return -EFAULT; | ||
378 | } | ||
379 | |||
380 | return 0; | ||
381 | } | ||
382 | |||
383 | /* | ||
384 | * Dump the debug register contents to the user. | ||
385 | * We can't dump our per cpu values because it | ||
386 | * may contain cpu wide breakpoint, something that | ||
387 | * doesn't belong to the current task. | ||
388 | * | ||
389 | * TODO: include non-ptrace user breakpoints (perf) | ||
390 | */ | ||
391 | void aout_dump_debugregs(struct user *dump) | ||
392 | { | ||
393 | int i; | ||
394 | int dr7 = 0; | ||
395 | struct perf_event *bp; | ||
396 | struct arch_hw_breakpoint *info; | ||
397 | struct thread_struct *thread = ¤t->thread; | ||
398 | |||
399 | for (i = 0; i < HBP_NUM; i++) { | ||
400 | bp = thread->ptrace_bps[i]; | ||
401 | |||
402 | if (bp && !bp->attr.disabled) { | ||
403 | dump->u_debugreg[i] = bp->attr.bp_addr; | ||
404 | info = counter_arch_bp(bp); | ||
405 | dr7 |= encode_dr7(i, info->len, info->type); | ||
406 | } else { | ||
407 | dump->u_debugreg[i] = 0; | ||
408 | } | ||
409 | } | ||
410 | |||
411 | dump->u_debugreg[4] = 0; | ||
412 | dump->u_debugreg[5] = 0; | ||
413 | dump->u_debugreg[6] = current->thread.debugreg6; | ||
414 | |||
415 | dump->u_debugreg[7] = dr7; | ||
416 | } | ||
417 | EXPORT_SYMBOL_GPL(aout_dump_debugregs); | ||
418 | |||
419 | /* | ||
420 | * Release the user breakpoints used by ptrace | ||
421 | */ | ||
422 | void flush_ptrace_hw_breakpoint(struct task_struct *tsk) | ||
423 | { | ||
424 | int i; | ||
425 | struct thread_struct *t = &tsk->thread; | ||
426 | |||
427 | for (i = 0; i < HBP_NUM; i++) { | ||
428 | unregister_hw_breakpoint(t->ptrace_bps[i]); | ||
429 | t->ptrace_bps[i] = NULL; | ||
430 | } | ||
431 | } | ||
432 | |||
433 | void hw_breakpoint_restore(void) | ||
434 | { | ||
435 | set_debugreg(__get_cpu_var(cpu_debugreg[0]), 0); | ||
436 | set_debugreg(__get_cpu_var(cpu_debugreg[1]), 1); | ||
437 | set_debugreg(__get_cpu_var(cpu_debugreg[2]), 2); | ||
438 | set_debugreg(__get_cpu_var(cpu_debugreg[3]), 3); | ||
439 | set_debugreg(current->thread.debugreg6, 6); | ||
440 | set_debugreg(__get_cpu_var(dr7), 7); | ||
441 | } | ||
442 | EXPORT_SYMBOL_GPL(hw_breakpoint_restore); | ||
443 | |||
444 | /* | ||
445 | * Handle debug exception notifications. | ||
446 | * | ||
447 | * Return value is either NOTIFY_STOP or NOTIFY_DONE as explained below. | ||
448 | * | ||
449 | * NOTIFY_DONE returned if one of the following conditions is true. | ||
450 | * i) When the causative address is from user-space and the exception | ||
451 | * is a valid one, i.e. not triggered as a result of lazy debug register | ||
452 | * switching | ||
453 | * ii) When there are more bits than trap<n> set in DR6 register (such | ||
454 | * as BD, BS or BT) indicating that more than one debug condition is | ||
455 | * met and requires some more action in do_debug(). | ||
456 | * | ||
457 | * NOTIFY_STOP returned for all other cases | ||
458 | * | ||
459 | */ | ||
460 | static int __kprobes hw_breakpoint_handler(struct die_args *args) | ||
461 | { | ||
462 | int i, cpu, rc = NOTIFY_STOP; | ||
463 | struct perf_event *bp; | ||
464 | unsigned long dr7, dr6; | ||
465 | unsigned long *dr6_p; | ||
466 | |||
467 | /* The DR6 value is pointed by args->err */ | ||
468 | dr6_p = (unsigned long *)ERR_PTR(args->err); | ||
469 | dr6 = *dr6_p; | ||
470 | |||
471 | /* Do an early return if no trap bits are set in DR6 */ | ||
472 | if ((dr6 & DR_TRAP_BITS) == 0) | ||
473 | return NOTIFY_DONE; | ||
474 | |||
475 | get_debugreg(dr7, 7); | ||
476 | /* Disable breakpoints during exception handling */ | ||
477 | set_debugreg(0UL, 7); | ||
478 | /* | ||
479 | * Assert that local interrupts are disabled | ||
480 | * Reset the DRn bits in the virtualized register value. | ||
481 | * The ptrace trigger routine will add in whatever is needed. | ||
482 | */ | ||
483 | current->thread.debugreg6 &= ~DR_TRAP_BITS; | ||
484 | cpu = get_cpu(); | ||
485 | |||
486 | /* Handle all the breakpoints that were triggered */ | ||
487 | for (i = 0; i < HBP_NUM; ++i) { | ||
488 | if (likely(!(dr6 & (DR_TRAP0 << i)))) | ||
489 | continue; | ||
490 | |||
491 | /* | ||
492 | * The counter may be concurrently released but that can only | ||
493 | * occur from a call_rcu() path. We can then safely fetch | ||
494 | * the breakpoint, use its callback, touch its counter | ||
495 | * while we are in an rcu_read_lock() path. | ||
496 | */ | ||
497 | rcu_read_lock(); | ||
498 | |||
499 | bp = per_cpu(bp_per_reg[i], cpu); | ||
500 | if (bp) | ||
501 | rc = NOTIFY_DONE; | ||
502 | /* | ||
503 | * Reset the 'i'th TRAP bit in dr6 to denote completion of | ||
504 | * exception handling | ||
505 | */ | ||
506 | (*dr6_p) &= ~(DR_TRAP0 << i); | ||
507 | /* | ||
508 | * bp can be NULL due to lazy debug register switching | ||
509 | * or due to concurrent perf counter removing. | ||
510 | */ | ||
511 | if (!bp) { | ||
512 | rcu_read_unlock(); | ||
513 | break; | ||
514 | } | ||
515 | |||
516 | (bp->callback)(bp, args->regs); | ||
517 | |||
518 | rcu_read_unlock(); | ||
519 | } | ||
520 | if (dr6 & (~DR_TRAP_BITS)) | ||
521 | rc = NOTIFY_DONE; | ||
522 | |||
523 | set_debugreg(dr7, 7); | ||
524 | put_cpu(); | ||
525 | |||
526 | return rc; | ||
527 | } | ||
528 | |||
529 | /* | ||
530 | * Handle debug exception notifications. | ||
531 | */ | ||
532 | int __kprobes hw_breakpoint_exceptions_notify( | ||
533 | struct notifier_block *unused, unsigned long val, void *data) | ||
534 | { | ||
535 | if (val != DIE_DEBUG) | ||
536 | return NOTIFY_DONE; | ||
537 | |||
538 | return hw_breakpoint_handler(data); | ||
539 | } | ||
540 | |||
541 | void hw_breakpoint_pmu_read(struct perf_event *bp) | ||
542 | { | ||
543 | /* TODO */ | ||
544 | } | ||
545 | |||
546 | void hw_breakpoint_pmu_unthrottle(struct perf_event *bp) | ||
547 | { | ||
548 | /* TODO */ | ||
549 | } | ||
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 8d82a77a3f3b..34e86b67550c 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c | |||
@@ -43,6 +43,7 @@ | |||
43 | #include <linux/smp.h> | 43 | #include <linux/smp.h> |
44 | #include <linux/nmi.h> | 44 | #include <linux/nmi.h> |
45 | 45 | ||
46 | #include <asm/debugreg.h> | ||
46 | #include <asm/apicdef.h> | 47 | #include <asm/apicdef.h> |
47 | #include <asm/system.h> | 48 | #include <asm/system.h> |
48 | 49 | ||
@@ -434,6 +435,11 @@ single_step_cont(struct pt_regs *regs, struct die_args *args) | |||
434 | "resuming...\n"); | 435 | "resuming...\n"); |
435 | kgdb_arch_handle_exception(args->trapnr, args->signr, | 436 | kgdb_arch_handle_exception(args->trapnr, args->signr, |
436 | args->err, "c", "", regs); | 437 | args->err, "c", "", regs); |
438 | /* | ||
439 | * Reset the BS bit in dr6 (pointed by args->err) to | ||
440 | * denote completion of processing | ||
441 | */ | ||
442 | (*(unsigned long *)ERR_PTR(args->err)) &= ~DR_STEP; | ||
437 | 443 | ||
438 | return NOTIFY_STOP; | 444 | return NOTIFY_STOP; |
439 | } | 445 | } |
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index c5f1f117e0c0..3fe86d706a14 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c | |||
@@ -56,6 +56,7 @@ | |||
56 | #include <asm/uaccess.h> | 56 | #include <asm/uaccess.h> |
57 | #include <asm/alternative.h> | 57 | #include <asm/alternative.h> |
58 | #include <asm/insn.h> | 58 | #include <asm/insn.h> |
59 | #include <asm/debugreg.h> | ||
59 | 60 | ||
60 | void jprobe_return_end(void); | 61 | void jprobe_return_end(void); |
61 | 62 | ||
@@ -945,8 +946,14 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, | |||
945 | ret = NOTIFY_STOP; | 946 | ret = NOTIFY_STOP; |
946 | break; | 947 | break; |
947 | case DIE_DEBUG: | 948 | case DIE_DEBUG: |
948 | if (post_kprobe_handler(args->regs)) | 949 | if (post_kprobe_handler(args->regs)) { |
950 | /* | ||
951 | * Reset the BS bit in dr6 (pointed by args->err) to | ||
952 | * denote completion of processing | ||
953 | */ | ||
954 | (*(unsigned long *)ERR_PTR(args->err)) &= ~DR_STEP; | ||
949 | ret = NOTIFY_STOP; | 955 | ret = NOTIFY_STOP; |
956 | } | ||
950 | break; | 957 | break; |
951 | case DIE_GPF: | 958 | case DIE_GPF: |
952 | /* | 959 | /* |
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index c1c429d00130..c843f8406da2 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #include <asm/desc.h> | 25 | #include <asm/desc.h> |
26 | #include <asm/system.h> | 26 | #include <asm/system.h> |
27 | #include <asm/cacheflush.h> | 27 | #include <asm/cacheflush.h> |
28 | #include <asm/debugreg.h> | ||
28 | 29 | ||
29 | static void set_idt(void *newidt, __u16 limit) | 30 | static void set_idt(void *newidt, __u16 limit) |
30 | { | 31 | { |
@@ -202,6 +203,7 @@ void machine_kexec(struct kimage *image) | |||
202 | 203 | ||
203 | /* Interrupts aren't acceptable while we reboot */ | 204 | /* Interrupts aren't acceptable while we reboot */ |
204 | local_irq_disable(); | 205 | local_irq_disable(); |
206 | hw_breakpoint_disable(); | ||
205 | 207 | ||
206 | if (image->preserve_context) { | 208 | if (image->preserve_context) { |
207 | #ifdef CONFIG_X86_IO_APIC | 209 | #ifdef CONFIG_X86_IO_APIC |
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 84c3bf209e98..4a8bb82248ae 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <asm/pgtable.h> | 18 | #include <asm/pgtable.h> |
19 | #include <asm/tlbflush.h> | 19 | #include <asm/tlbflush.h> |
20 | #include <asm/mmu_context.h> | 20 | #include <asm/mmu_context.h> |
21 | #include <asm/debugreg.h> | ||
21 | 22 | ||
22 | static int init_one_level2_page(struct kimage *image, pgd_t *pgd, | 23 | static int init_one_level2_page(struct kimage *image, pgd_t *pgd, |
23 | unsigned long addr) | 24 | unsigned long addr) |
@@ -282,6 +283,7 @@ void machine_kexec(struct kimage *image) | |||
282 | 283 | ||
283 | /* Interrupts aren't acceptable while we reboot */ | 284 | /* Interrupts aren't acceptable while we reboot */ |
284 | local_irq_disable(); | 285 | local_irq_disable(); |
286 | hw_breakpoint_disable(); | ||
285 | 287 | ||
286 | if (image->preserve_context) { | 288 | if (image->preserve_context) { |
287 | #ifdef CONFIG_X86_IO_APIC | 289 | #ifdef CONFIG_X86_IO_APIC |
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 5284cd2b5776..744508e7cfdd 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
@@ -10,6 +10,7 @@ | |||
10 | #include <linux/clockchips.h> | 10 | #include <linux/clockchips.h> |
11 | #include <linux/random.h> | 11 | #include <linux/random.h> |
12 | #include <trace/events/power.h> | 12 | #include <trace/events/power.h> |
13 | #include <linux/hw_breakpoint.h> | ||
13 | #include <asm/system.h> | 14 | #include <asm/system.h> |
14 | #include <asm/apic.h> | 15 | #include <asm/apic.h> |
15 | #include <asm/syscalls.h> | 16 | #include <asm/syscalls.h> |
@@ -17,6 +18,7 @@ | |||
17 | #include <asm/uaccess.h> | 18 | #include <asm/uaccess.h> |
18 | #include <asm/i387.h> | 19 | #include <asm/i387.h> |
19 | #include <asm/ds.h> | 20 | #include <asm/ds.h> |
21 | #include <asm/debugreg.h> | ||
20 | 22 | ||
21 | unsigned long idle_halt; | 23 | unsigned long idle_halt; |
22 | EXPORT_SYMBOL(idle_halt); | 24 | EXPORT_SYMBOL(idle_halt); |
@@ -103,14 +105,7 @@ void flush_thread(void) | |||
103 | } | 105 | } |
104 | #endif | 106 | #endif |
105 | 107 | ||
106 | clear_tsk_thread_flag(tsk, TIF_DEBUG); | 108 | flush_ptrace_hw_breakpoint(tsk); |
107 | |||
108 | tsk->thread.debugreg0 = 0; | ||
109 | tsk->thread.debugreg1 = 0; | ||
110 | tsk->thread.debugreg2 = 0; | ||
111 | tsk->thread.debugreg3 = 0; | ||
112 | tsk->thread.debugreg6 = 0; | ||
113 | tsk->thread.debugreg7 = 0; | ||
114 | memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); | 109 | memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); |
115 | /* | 110 | /* |
116 | * Forget coprocessor state.. | 111 | * Forget coprocessor state.. |
@@ -192,16 +187,6 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, | |||
192 | else if (next->debugctlmsr != prev->debugctlmsr) | 187 | else if (next->debugctlmsr != prev->debugctlmsr) |
193 | update_debugctlmsr(next->debugctlmsr); | 188 | update_debugctlmsr(next->debugctlmsr); |
194 | 189 | ||
195 | if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { | ||
196 | set_debugreg(next->debugreg0, 0); | ||
197 | set_debugreg(next->debugreg1, 1); | ||
198 | set_debugreg(next->debugreg2, 2); | ||
199 | set_debugreg(next->debugreg3, 3); | ||
200 | /* no 4 and 5 */ | ||
201 | set_debugreg(next->debugreg6, 6); | ||
202 | set_debugreg(next->debugreg7, 7); | ||
203 | } | ||
204 | |||
205 | if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ | 190 | if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ |
206 | test_tsk_thread_flag(next_p, TIF_NOTSC)) { | 191 | test_tsk_thread_flag(next_p, TIF_NOTSC)) { |
207 | /* prev and next are different */ | 192 | /* prev and next are different */ |
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 4cf79567cdab..d5bd3132ee70 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c | |||
@@ -58,6 +58,7 @@ | |||
58 | #include <asm/idle.h> | 58 | #include <asm/idle.h> |
59 | #include <asm/syscalls.h> | 59 | #include <asm/syscalls.h> |
60 | #include <asm/ds.h> | 60 | #include <asm/ds.h> |
61 | #include <asm/debugreg.h> | ||
61 | 62 | ||
62 | asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); | 63 | asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); |
63 | 64 | ||
@@ -259,7 +260,12 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, | |||
259 | 260 | ||
260 | task_user_gs(p) = get_user_gs(regs); | 261 | task_user_gs(p) = get_user_gs(regs); |
261 | 262 | ||
263 | p->thread.io_bitmap_ptr = NULL; | ||
262 | tsk = current; | 264 | tsk = current; |
265 | err = -ENOMEM; | ||
266 | |||
267 | memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); | ||
268 | |||
263 | if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { | 269 | if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { |
264 | p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr, | 270 | p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr, |
265 | IO_BITMAP_BYTES, GFP_KERNEL); | 271 | IO_BITMAP_BYTES, GFP_KERNEL); |
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index eb62cbcaa490..70cf15873f3d 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
@@ -52,6 +52,7 @@ | |||
52 | #include <asm/idle.h> | 52 | #include <asm/idle.h> |
53 | #include <asm/syscalls.h> | 53 | #include <asm/syscalls.h> |
54 | #include <asm/ds.h> | 54 | #include <asm/ds.h> |
55 | #include <asm/debugreg.h> | ||
55 | 56 | ||
56 | asmlinkage extern void ret_from_fork(void); | 57 | asmlinkage extern void ret_from_fork(void); |
57 | 58 | ||
@@ -297,12 +298,16 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, | |||
297 | 298 | ||
298 | p->thread.fs = me->thread.fs; | 299 | p->thread.fs = me->thread.fs; |
299 | p->thread.gs = me->thread.gs; | 300 | p->thread.gs = me->thread.gs; |
301 | p->thread.io_bitmap_ptr = NULL; | ||
300 | 302 | ||
301 | savesegment(gs, p->thread.gsindex); | 303 | savesegment(gs, p->thread.gsindex); |
302 | savesegment(fs, p->thread.fsindex); | 304 | savesegment(fs, p->thread.fsindex); |
303 | savesegment(es, p->thread.es); | 305 | savesegment(es, p->thread.es); |
304 | savesegment(ds, p->thread.ds); | 306 | savesegment(ds, p->thread.ds); |
305 | 307 | ||
308 | err = -ENOMEM; | ||
309 | memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); | ||
310 | |||
306 | if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { | 311 | if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { |
307 | p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); | 312 | p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); |
308 | if (!p->thread.io_bitmap_ptr) { | 313 | if (!p->thread.io_bitmap_ptr) { |
@@ -341,6 +346,7 @@ out: | |||
341 | kfree(p->thread.io_bitmap_ptr); | 346 | kfree(p->thread.io_bitmap_ptr); |
342 | p->thread.io_bitmap_max = 0; | 347 | p->thread.io_bitmap_max = 0; |
343 | } | 348 | } |
349 | |||
344 | return err; | 350 | return err; |
345 | } | 351 | } |
346 | 352 | ||
@@ -495,6 +501,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
495 | */ | 501 | */ |
496 | if (preload_fpu) | 502 | if (preload_fpu) |
497 | __math_state_restore(); | 503 | __math_state_restore(); |
504 | |||
498 | return prev_p; | 505 | return prev_p; |
499 | } | 506 | } |
500 | 507 | ||
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index c4f76d275ee4..b25f8947ed7a 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c | |||
@@ -22,6 +22,8 @@ | |||
22 | #include <linux/seccomp.h> | 22 | #include <linux/seccomp.h> |
23 | #include <linux/signal.h> | 23 | #include <linux/signal.h> |
24 | #include <linux/workqueue.h> | 24 | #include <linux/workqueue.h> |
25 | #include <linux/perf_event.h> | ||
26 | #include <linux/hw_breakpoint.h> | ||
25 | 27 | ||
26 | #include <asm/uaccess.h> | 28 | #include <asm/uaccess.h> |
27 | #include <asm/pgtable.h> | 29 | #include <asm/pgtable.h> |
@@ -34,6 +36,7 @@ | |||
34 | #include <asm/prctl.h> | 36 | #include <asm/prctl.h> |
35 | #include <asm/proto.h> | 37 | #include <asm/proto.h> |
36 | #include <asm/ds.h> | 38 | #include <asm/ds.h> |
39 | #include <asm/hw_breakpoint.h> | ||
37 | 40 | ||
38 | #include "tls.h" | 41 | #include "tls.h" |
39 | 42 | ||
@@ -249,11 +252,6 @@ static int set_segment_reg(struct task_struct *task, | |||
249 | return 0; | 252 | return 0; |
250 | } | 253 | } |
251 | 254 | ||
252 | static unsigned long debugreg_addr_limit(struct task_struct *task) | ||
253 | { | ||
254 | return TASK_SIZE - 3; | ||
255 | } | ||
256 | |||
257 | #else /* CONFIG_X86_64 */ | 255 | #else /* CONFIG_X86_64 */ |
258 | 256 | ||
259 | #define FLAG_MASK (FLAG_MASK_32 | X86_EFLAGS_NT) | 257 | #define FLAG_MASK (FLAG_MASK_32 | X86_EFLAGS_NT) |
@@ -378,15 +376,6 @@ static int set_segment_reg(struct task_struct *task, | |||
378 | return 0; | 376 | return 0; |
379 | } | 377 | } |
380 | 378 | ||
381 | static unsigned long debugreg_addr_limit(struct task_struct *task) | ||
382 | { | ||
383 | #ifdef CONFIG_IA32_EMULATION | ||
384 | if (test_tsk_thread_flag(task, TIF_IA32)) | ||
385 | return IA32_PAGE_OFFSET - 3; | ||
386 | #endif | ||
387 | return TASK_SIZE_MAX - 7; | ||
388 | } | ||
389 | |||
390 | #endif /* CONFIG_X86_32 */ | 379 | #endif /* CONFIG_X86_32 */ |
391 | 380 | ||
392 | static unsigned long get_flags(struct task_struct *task) | 381 | static unsigned long get_flags(struct task_struct *task) |
@@ -566,99 +555,229 @@ static int genregs_set(struct task_struct *target, | |||
566 | return ret; | 555 | return ret; |
567 | } | 556 | } |
568 | 557 | ||
558 | static void ptrace_triggered(struct perf_event *bp, void *data) | ||
559 | { | ||
560 | int i; | ||
561 | struct thread_struct *thread = &(current->thread); | ||
562 | |||
563 | /* | ||
564 | * Store in the virtual DR6 register the fact that the breakpoint | ||
565 | * was hit so the thread's debugger will see it. | ||
566 | */ | ||
567 | for (i = 0; i < HBP_NUM; i++) { | ||
568 | if (thread->ptrace_bps[i] == bp) | ||
569 | break; | ||
570 | } | ||
571 | |||
572 | thread->debugreg6 |= (DR_TRAP0 << i); | ||
573 | } | ||
574 | |||
569 | /* | 575 | /* |
570 | * This function is trivial and will be inlined by the compiler. | 576 | * Walk through every ptrace breakpoints for this thread and |
571 | * Having it separates the implementation details of debug | 577 | * build the dr7 value on top of their attributes. |
572 | * registers from the interface details of ptrace. | 578 | * |
573 | */ | 579 | */ |
574 | static unsigned long ptrace_get_debugreg(struct task_struct *child, int n) | 580 | static unsigned long ptrace_get_dr7(struct perf_event *bp[]) |
575 | { | 581 | { |
576 | switch (n) { | 582 | int i; |
577 | case 0: return child->thread.debugreg0; | 583 | int dr7 = 0; |
578 | case 1: return child->thread.debugreg1; | 584 | struct arch_hw_breakpoint *info; |
579 | case 2: return child->thread.debugreg2; | 585 | |
580 | case 3: return child->thread.debugreg3; | 586 | for (i = 0; i < HBP_NUM; i++) { |
581 | case 6: return child->thread.debugreg6; | 587 | if (bp[i] && !bp[i]->attr.disabled) { |
582 | case 7: return child->thread.debugreg7; | 588 | info = counter_arch_bp(bp[i]); |
589 | dr7 |= encode_dr7(i, info->len, info->type); | ||
590 | } | ||
583 | } | 591 | } |
584 | return 0; | 592 | |
593 | return dr7; | ||
585 | } | 594 | } |
586 | 595 | ||
587 | static int ptrace_set_debugreg(struct task_struct *child, | 596 | /* |
588 | int n, unsigned long data) | 597 | * Handle ptrace writes to debug register 7. |
598 | */ | ||
599 | static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data) | ||
589 | { | 600 | { |
590 | int i; | 601 | struct thread_struct *thread = &(tsk->thread); |
602 | unsigned long old_dr7; | ||
603 | int i, orig_ret = 0, rc = 0; | ||
604 | int enabled, second_pass = 0; | ||
605 | unsigned len, type; | ||
606 | int gen_len, gen_type; | ||
607 | struct perf_event *bp; | ||
608 | |||
609 | data &= ~DR_CONTROL_RESERVED; | ||
610 | old_dr7 = ptrace_get_dr7(thread->ptrace_bps); | ||
611 | restore: | ||
612 | /* | ||
613 | * Loop through all the hardware breakpoints, making the | ||
614 | * appropriate changes to each. | ||
615 | */ | ||
616 | for (i = 0; i < HBP_NUM; i++) { | ||
617 | enabled = decode_dr7(data, i, &len, &type); | ||
618 | bp = thread->ptrace_bps[i]; | ||
619 | |||
620 | if (!enabled) { | ||
621 | if (bp) { | ||
622 | /* | ||
623 | * Don't unregister the breakpoints right-away, | ||
624 | * unless all register_user_hw_breakpoint() | ||
625 | * requests have succeeded. This prevents | ||
626 | * any window of opportunity for debug | ||
627 | * register grabbing by other users. | ||
628 | */ | ||
629 | if (!second_pass) | ||
630 | continue; | ||
631 | thread->ptrace_bps[i] = NULL; | ||
632 | unregister_hw_breakpoint(bp); | ||
633 | } | ||
634 | continue; | ||
635 | } | ||
591 | 636 | ||
592 | if (unlikely(n == 4 || n == 5)) | 637 | /* |
593 | return -EIO; | 638 | * We shoud have at least an inactive breakpoint at this |
639 | * slot. It means the user is writing dr7 without having | ||
640 | * written the address register first | ||
641 | */ | ||
642 | if (!bp) { | ||
643 | rc = -EINVAL; | ||
644 | break; | ||
645 | } | ||
594 | 646 | ||
595 | if (n < 4 && unlikely(data >= debugreg_addr_limit(child))) | 647 | rc = arch_bp_generic_fields(len, type, &gen_len, &gen_type); |
596 | return -EIO; | 648 | if (rc) |
649 | break; | ||
597 | 650 | ||
598 | switch (n) { | 651 | /* |
599 | case 0: child->thread.debugreg0 = data; break; | 652 | * This is a temporary thing as bp is unregistered/registered |
600 | case 1: child->thread.debugreg1 = data; break; | 653 | * to simulate modification |
601 | case 2: child->thread.debugreg2 = data; break; | 654 | */ |
602 | case 3: child->thread.debugreg3 = data; break; | 655 | bp = modify_user_hw_breakpoint(bp, bp->attr.bp_addr, gen_len, |
656 | gen_type, bp->callback, | ||
657 | tsk, true); | ||
658 | thread->ptrace_bps[i] = NULL; | ||
603 | 659 | ||
604 | case 6: | 660 | if (!bp) { /* incorrect bp, or we have a bug in bp API */ |
605 | if ((data & ~0xffffffffUL) != 0) | 661 | rc = -EINVAL; |
606 | return -EIO; | 662 | break; |
607 | child->thread.debugreg6 = data; | 663 | } |
608 | break; | 664 | if (IS_ERR(bp)) { |
665 | rc = PTR_ERR(bp); | ||
666 | bp = NULL; | ||
667 | break; | ||
668 | } | ||
669 | thread->ptrace_bps[i] = bp; | ||
670 | } | ||
671 | /* | ||
672 | * Make a second pass to free the remaining unused breakpoints | ||
673 | * or to restore the original breakpoints if an error occurred. | ||
674 | */ | ||
675 | if (!second_pass) { | ||
676 | second_pass = 1; | ||
677 | if (rc < 0) { | ||
678 | orig_ret = rc; | ||
679 | data = old_dr7; | ||
680 | } | ||
681 | goto restore; | ||
682 | } | ||
683 | return ((orig_ret < 0) ? orig_ret : rc); | ||
684 | } | ||
609 | 685 | ||
610 | case 7: | 686 | /* |
687 | * Handle PTRACE_PEEKUSR calls for the debug register area. | ||
688 | */ | ||
689 | static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n) | ||
690 | { | ||
691 | struct thread_struct *thread = &(tsk->thread); | ||
692 | unsigned long val = 0; | ||
693 | |||
694 | if (n < HBP_NUM) { | ||
695 | struct perf_event *bp; | ||
696 | bp = thread->ptrace_bps[n]; | ||
697 | if (!bp) | ||
698 | return 0; | ||
699 | val = bp->hw.info.address; | ||
700 | } else if (n == 6) { | ||
701 | val = thread->debugreg6; | ||
702 | } else if (n == 7) { | ||
703 | val = ptrace_get_dr7(thread->ptrace_bps); | ||
704 | } | ||
705 | return val; | ||
706 | } | ||
707 | |||
708 | static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr, | ||
709 | unsigned long addr) | ||
710 | { | ||
711 | struct perf_event *bp; | ||
712 | struct thread_struct *t = &tsk->thread; | ||
713 | |||
714 | if (!t->ptrace_bps[nr]) { | ||
611 | /* | 715 | /* |
612 | * Sanity-check data. Take one half-byte at once with | 716 | * Put stub len and type to register (reserve) an inactive but |
613 | * check = (val >> (16 + 4*i)) & 0xf. It contains the | 717 | * correct bp |
614 | * R/Wi and LENi bits; bits 0 and 1 are R/Wi, and bits | ||
615 | * 2 and 3 are LENi. Given a list of invalid values, | ||
616 | * we do mask |= 1 << invalid_value, so that | ||
617 | * (mask >> check) & 1 is a correct test for invalid | ||
618 | * values. | ||
619 | * | ||
620 | * R/Wi contains the type of the breakpoint / | ||
621 | * watchpoint, LENi contains the length of the watched | ||
622 | * data in the watchpoint case. | ||
623 | * | ||
624 | * The invalid values are: | ||
625 | * - LENi == 0x10 (undefined), so mask |= 0x0f00. [32-bit] | ||
626 | * - R/Wi == 0x10 (break on I/O reads or writes), so | ||
627 | * mask |= 0x4444. | ||
628 | * - R/Wi == 0x00 && LENi != 0x00, so we have mask |= | ||
629 | * 0x1110. | ||
630 | * | ||
631 | * Finally, mask = 0x0f00 | 0x4444 | 0x1110 == 0x5f54. | ||
632 | * | ||
633 | * See the Intel Manual "System Programming Guide", | ||
634 | * 15.2.4 | ||
635 | * | ||
636 | * Note that LENi == 0x10 is defined on x86_64 in long | ||
637 | * mode (i.e. even for 32-bit userspace software, but | ||
638 | * 64-bit kernel), so the x86_64 mask value is 0x5454. | ||
639 | * See the AMD manual no. 24593 (AMD64 System Programming) | ||
640 | */ | 718 | */ |
641 | #ifdef CONFIG_X86_32 | 719 | bp = register_user_hw_breakpoint(addr, HW_BREAKPOINT_LEN_1, |
642 | #define DR7_MASK 0x5f54 | 720 | HW_BREAKPOINT_W, |
643 | #else | 721 | ptrace_triggered, tsk, |
644 | #define DR7_MASK 0x5554 | 722 | false); |
645 | #endif | 723 | } else { |
646 | data &= ~DR_CONTROL_RESERVED; | 724 | bp = t->ptrace_bps[nr]; |
647 | for (i = 0; i < 4; i++) | 725 | t->ptrace_bps[nr] = NULL; |
648 | if ((DR7_MASK >> ((data >> (16 + 4*i)) & 0xf)) & 1) | 726 | bp = modify_user_hw_breakpoint(bp, addr, bp->attr.bp_len, |
649 | return -EIO; | 727 | bp->attr.bp_type, |
650 | child->thread.debugreg7 = data; | 728 | bp->callback, |
651 | if (data) | 729 | tsk, |
652 | set_tsk_thread_flag(child, TIF_DEBUG); | 730 | bp->attr.disabled); |
653 | else | ||
654 | clear_tsk_thread_flag(child, TIF_DEBUG); | ||
655 | break; | ||
656 | } | 731 | } |
657 | 732 | ||
733 | if (!bp) | ||
734 | return -EIO; | ||
735 | /* | ||
736 | * CHECKME: the previous code returned -EIO if the addr wasn't a | ||
737 | * valid task virtual addr. The new one will return -EINVAL in this | ||
738 | * case. | ||
739 | * -EINVAL may be what we want for in-kernel breakpoints users, but | ||
740 | * -EIO looks better for ptrace, since we refuse a register writing | ||
741 | * for the user. And anyway this is the previous behaviour. | ||
742 | */ | ||
743 | if (IS_ERR(bp)) | ||
744 | return PTR_ERR(bp); | ||
745 | |||
746 | t->ptrace_bps[nr] = bp; | ||
747 | |||
658 | return 0; | 748 | return 0; |
659 | } | 749 | } |
660 | 750 | ||
661 | /* | 751 | /* |
752 | * Handle PTRACE_POKEUSR calls for the debug register area. | ||
753 | */ | ||
754 | int ptrace_set_debugreg(struct task_struct *tsk, int n, unsigned long val) | ||
755 | { | ||
756 | struct thread_struct *thread = &(tsk->thread); | ||
757 | int rc = 0; | ||
758 | |||
759 | /* There are no DR4 or DR5 registers */ | ||
760 | if (n == 4 || n == 5) | ||
761 | return -EIO; | ||
762 | |||
763 | if (n == 6) { | ||
764 | thread->debugreg6 = val; | ||
765 | goto ret_path; | ||
766 | } | ||
767 | if (n < HBP_NUM) { | ||
768 | rc = ptrace_set_breakpoint_addr(tsk, n, val); | ||
769 | if (rc) | ||
770 | return rc; | ||
771 | } | ||
772 | /* All that's left is DR7 */ | ||
773 | if (n == 7) | ||
774 | rc = ptrace_write_dr7(tsk, val); | ||
775 | |||
776 | ret_path: | ||
777 | return rc; | ||
778 | } | ||
779 | |||
780 | /* | ||
662 | * These access the current or another (stopped) task's io permission | 781 | * These access the current or another (stopped) task's io permission |
663 | * bitmap for debugging or core dump. | 782 | * bitmap for debugging or core dump. |
664 | */ | 783 | */ |
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 6a44a76055ad..fbf3b07c8567 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c | |||
@@ -799,15 +799,6 @@ static void do_signal(struct pt_regs *regs) | |||
799 | 799 | ||
800 | signr = get_signal_to_deliver(&info, &ka, regs, NULL); | 800 | signr = get_signal_to_deliver(&info, &ka, regs, NULL); |
801 | if (signr > 0) { | 801 | if (signr > 0) { |
802 | /* | ||
803 | * Re-enable any watchpoints before delivering the | ||
804 | * signal to user space. The processor register will | ||
805 | * have been cleared if the watchpoint triggered | ||
806 | * inside the kernel. | ||
807 | */ | ||
808 | if (current->thread.debugreg7) | ||
809 | set_debugreg(current->thread.debugreg7, 7); | ||
810 | |||
811 | /* Whee! Actually deliver the signal. */ | 802 | /* Whee! Actually deliver the signal. */ |
812 | if (handle_signal(signr, &info, &ka, oldset, regs) == 0) { | 803 | if (handle_signal(signr, &info, &ka, oldset, regs) == 0) { |
813 | /* | 804 | /* |
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 7e37dcee0cc3..33399176512a 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
@@ -529,77 +529,56 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) | |||
529 | dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) | 529 | dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) |
530 | { | 530 | { |
531 | struct task_struct *tsk = current; | 531 | struct task_struct *tsk = current; |
532 | unsigned long condition; | 532 | unsigned long dr6; |
533 | int si_code; | 533 | int si_code; |
534 | 534 | ||
535 | get_debugreg(condition, 6); | 535 | get_debugreg(dr6, 6); |
536 | 536 | ||
537 | /* Catch kmemcheck conditions first of all! */ | 537 | /* Catch kmemcheck conditions first of all! */ |
538 | if (condition & DR_STEP && kmemcheck_trap(regs)) | 538 | if ((dr6 & DR_STEP) && kmemcheck_trap(regs)) |
539 | return; | 539 | return; |
540 | 540 | ||
541 | /* DR6 may or may not be cleared by the CPU */ | ||
542 | set_debugreg(0, 6); | ||
541 | /* | 543 | /* |
542 | * The processor cleared BTF, so don't mark that we need it set. | 544 | * The processor cleared BTF, so don't mark that we need it set. |
543 | */ | 545 | */ |
544 | clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR); | 546 | clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR); |
545 | tsk->thread.debugctlmsr = 0; | 547 | tsk->thread.debugctlmsr = 0; |
546 | 548 | ||
547 | if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, | 549 | /* Store the virtualized DR6 value */ |
548 | SIGTRAP) == NOTIFY_STOP) | 550 | tsk->thread.debugreg6 = dr6; |
551 | |||
552 | if (notify_die(DIE_DEBUG, "debug", regs, PTR_ERR(&dr6), error_code, | ||
553 | SIGTRAP) == NOTIFY_STOP) | ||
549 | return; | 554 | return; |
550 | 555 | ||
551 | /* It's safe to allow irq's after DR6 has been saved */ | 556 | /* It's safe to allow irq's after DR6 has been saved */ |
552 | preempt_conditional_sti(regs); | 557 | preempt_conditional_sti(regs); |
553 | 558 | ||
554 | /* Mask out spurious debug traps due to lazy DR7 setting */ | 559 | if (regs->flags & X86_VM_MASK) { |
555 | if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { | 560 | handle_vm86_trap((struct kernel_vm86_regs *) regs, |
556 | if (!tsk->thread.debugreg7) | 561 | error_code, 1); |
557 | goto clear_dr7; | 562 | return; |
558 | } | 563 | } |
559 | 564 | ||
560 | #ifdef CONFIG_X86_32 | ||
561 | if (regs->flags & X86_VM_MASK) | ||
562 | goto debug_vm86; | ||
563 | #endif | ||
564 | |||
565 | /* Save debug status register where ptrace can see it */ | ||
566 | tsk->thread.debugreg6 = condition; | ||
567 | |||
568 | /* | 565 | /* |
569 | * Single-stepping through TF: make sure we ignore any events in | 566 | * Single-stepping through system calls: ignore any exceptions in |
570 | * kernel space (but re-enable TF when returning to user mode). | 567 | * kernel space, but re-enable TF when returning to user mode. |
568 | * | ||
569 | * We already checked v86 mode above, so we can check for kernel mode | ||
570 | * by just checking the CPL of CS. | ||
571 | */ | 571 | */ |
572 | if (condition & DR_STEP) { | 572 | if ((dr6 & DR_STEP) && !user_mode(regs)) { |
573 | if (!user_mode(regs)) | 573 | tsk->thread.debugreg6 &= ~DR_STEP; |
574 | goto clear_TF_reenable; | 574 | set_tsk_thread_flag(tsk, TIF_SINGLESTEP); |
575 | regs->flags &= ~X86_EFLAGS_TF; | ||
575 | } | 576 | } |
576 | 577 | si_code = get_si_code(tsk->thread.debugreg6); | |
577 | si_code = get_si_code(condition); | 578 | if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS)) |
578 | /* Ok, finally something we can handle */ | 579 | send_sigtrap(tsk, regs, error_code, si_code); |
579 | send_sigtrap(tsk, regs, error_code, si_code); | ||
580 | |||
581 | /* | ||
582 | * Disable additional traps. They'll be re-enabled when | ||
583 | * the signal is delivered. | ||
584 | */ | ||
585 | clear_dr7: | ||
586 | set_debugreg(0, 7); | ||
587 | preempt_conditional_cli(regs); | 580 | preempt_conditional_cli(regs); |
588 | return; | ||
589 | 581 | ||
590 | #ifdef CONFIG_X86_32 | ||
591 | debug_vm86: | ||
592 | /* reenable preemption: handle_vm86_trap() might sleep */ | ||
593 | dec_preempt_count(); | ||
594 | handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1); | ||
595 | conditional_cli(regs); | ||
596 | return; | ||
597 | #endif | ||
598 | |||
599 | clear_TF_reenable: | ||
600 | set_tsk_thread_flag(tsk, TIF_SINGLESTEP); | ||
601 | regs->flags &= ~X86_EFLAGS_TF; | ||
602 | preempt_conditional_cli(regs); | ||
603 | return; | 582 | return; |
604 | } | 583 | } |
605 | 584 | ||
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ae07d261527c..4fc80174191c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -42,6 +42,7 @@ | |||
42 | #define CREATE_TRACE_POINTS | 42 | #define CREATE_TRACE_POINTS |
43 | #include "trace.h" | 43 | #include "trace.h" |
44 | 44 | ||
45 | #include <asm/debugreg.h> | ||
45 | #include <asm/uaccess.h> | 46 | #include <asm/uaccess.h> |
46 | #include <asm/msr.h> | 47 | #include <asm/msr.h> |
47 | #include <asm/desc.h> | 48 | #include <asm/desc.h> |
@@ -3643,14 +3644,15 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3643 | trace_kvm_entry(vcpu->vcpu_id); | 3644 | trace_kvm_entry(vcpu->vcpu_id); |
3644 | kvm_x86_ops->run(vcpu, kvm_run); | 3645 | kvm_x86_ops->run(vcpu, kvm_run); |
3645 | 3646 | ||
3646 | if (unlikely(vcpu->arch.switch_db_regs || test_thread_flag(TIF_DEBUG))) { | 3647 | /* |
3647 | set_debugreg(current->thread.debugreg0, 0); | 3648 | * If the guest has used debug registers, at least dr7 |
3648 | set_debugreg(current->thread.debugreg1, 1); | 3649 | * will be disabled while returning to the host. |
3649 | set_debugreg(current->thread.debugreg2, 2); | 3650 | * If we don't have active breakpoints in the host, we don't |
3650 | set_debugreg(current->thread.debugreg3, 3); | 3651 | * care about the messed up debug address registers. But if |
3651 | set_debugreg(current->thread.debugreg6, 6); | 3652 | * we have some of them active, restore the old state. |
3652 | set_debugreg(current->thread.debugreg7, 7); | 3653 | */ |
3653 | } | 3654 | if (hw_breakpoint_active()) |
3655 | hw_breakpoint_restore(); | ||
3654 | 3656 | ||
3655 | set_bit(KVM_REQ_KICK, &vcpu->requests); | 3657 | set_bit(KVM_REQ_KICK, &vcpu->requests); |
3656 | local_irq_enable(); | 3658 | local_irq_enable(); |
diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index 16ccbd77917f..11a4ad4d6253 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c | |||
@@ -540,8 +540,14 @@ kmmio_die_notifier(struct notifier_block *nb, unsigned long val, void *args) | |||
540 | struct die_args *arg = args; | 540 | struct die_args *arg = args; |
541 | 541 | ||
542 | if (val == DIE_DEBUG && (arg->err & DR_STEP)) | 542 | if (val == DIE_DEBUG && (arg->err & DR_STEP)) |
543 | if (post_kmmio_handler(arg->err, arg->regs) == 1) | 543 | if (post_kmmio_handler(arg->err, arg->regs) == 1) { |
544 | /* | ||
545 | * Reset the BS bit in dr6 (pointed by args->err) to | ||
546 | * denote completion of processing | ||
547 | */ | ||
548 | (*(unsigned long *)ERR_PTR(arg->err)) &= ~DR_STEP; | ||
544 | return NOTIFY_STOP; | 549 | return NOTIFY_STOP; |
550 | } | ||
545 | 551 | ||
546 | return NOTIFY_DONE; | 552 | return NOTIFY_DONE; |
547 | } | 553 | } |
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index 8aa85f17667e..0a979f3e5b8a 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <asm/mce.h> | 18 | #include <asm/mce.h> |
19 | #include <asm/xcr.h> | 19 | #include <asm/xcr.h> |
20 | #include <asm/suspend.h> | 20 | #include <asm/suspend.h> |
21 | #include <asm/debugreg.h> | ||
21 | 22 | ||
22 | #ifdef CONFIG_X86_32 | 23 | #ifdef CONFIG_X86_32 |
23 | static struct saved_context saved_context; | 24 | static struct saved_context saved_context; |
@@ -142,31 +143,6 @@ static void fix_processor_context(void) | |||
142 | #endif | 143 | #endif |
143 | load_TR_desc(); /* This does ltr */ | 144 | load_TR_desc(); /* This does ltr */ |
144 | load_LDT(¤t->active_mm->context); /* This does lldt */ | 145 | load_LDT(¤t->active_mm->context); /* This does lldt */ |
145 | |||
146 | /* | ||
147 | * Now maybe reload the debug registers | ||
148 | */ | ||
149 | if (current->thread.debugreg7) { | ||
150 | #ifdef CONFIG_X86_32 | ||
151 | set_debugreg(current->thread.debugreg0, 0); | ||
152 | set_debugreg(current->thread.debugreg1, 1); | ||
153 | set_debugreg(current->thread.debugreg2, 2); | ||
154 | set_debugreg(current->thread.debugreg3, 3); | ||
155 | /* no 4 and 5 */ | ||
156 | set_debugreg(current->thread.debugreg6, 6); | ||
157 | set_debugreg(current->thread.debugreg7, 7); | ||
158 | #else | ||
159 | /* CONFIG_X86_64 */ | ||
160 | loaddebug(¤t->thread, 0); | ||
161 | loaddebug(¤t->thread, 1); | ||
162 | loaddebug(¤t->thread, 2); | ||
163 | loaddebug(¤t->thread, 3); | ||
164 | /* no 4 and 5 */ | ||
165 | loaddebug(¤t->thread, 6); | ||
166 | loaddebug(¤t->thread, 7); | ||
167 | #endif | ||
168 | } | ||
169 | |||
170 | } | 146 | } |
171 | 147 | ||
172 | /** | 148 | /** |
diff --git a/arch/x86/tools/test_get_len.c b/arch/x86/tools/test_get_len.c index af75e07217ba..d8214dc03fa7 100644 --- a/arch/x86/tools/test_get_len.c +++ b/arch/x86/tools/test_get_len.c | |||
@@ -114,6 +114,7 @@ int main(int argc, char **argv) | |||
114 | unsigned char insn_buf[16]; | 114 | unsigned char insn_buf[16]; |
115 | struct insn insn; | 115 | struct insn insn; |
116 | int insns = 0, c; | 116 | int insns = 0, c; |
117 | int warnings = 0; | ||
117 | 118 | ||
118 | parse_args(argc, argv); | 119 | parse_args(argc, argv); |
119 | 120 | ||
@@ -151,18 +152,22 @@ int main(int argc, char **argv) | |||
151 | insn_init(&insn, insn_buf, x86_64); | 152 | insn_init(&insn, insn_buf, x86_64); |
152 | insn_get_length(&insn); | 153 | insn_get_length(&insn); |
153 | if (insn.length != nb) { | 154 | if (insn.length != nb) { |
154 | fprintf(stderr, "Error: %s found a difference at %s\n", | 155 | warnings++; |
156 | fprintf(stderr, "Warning: %s found difference at %s\n", | ||
155 | prog, sym); | 157 | prog, sym); |
156 | fprintf(stderr, "Error: %s", line); | 158 | fprintf(stderr, "Warning: %s", line); |
157 | fprintf(stderr, "Error: objdump says %d bytes, but " | 159 | fprintf(stderr, "Warning: objdump says %d bytes, but " |
158 | "insn_get_length() says %d\n", nb, | 160 | "insn_get_length() says %d\n", nb, |
159 | insn.length); | 161 | insn.length); |
160 | if (verbose) | 162 | if (verbose) |
161 | dump_insn(stderr, &insn); | 163 | dump_insn(stderr, &insn); |
162 | exit(2); | ||
163 | } | 164 | } |
164 | } | 165 | } |
165 | fprintf(stderr, "Succeed: decoded and checked %d instructions\n", | 166 | if (warnings) |
166 | insns); | 167 | fprintf(stderr, "Warning: decoded and checked %d" |
168 | " instructions with %d warnings\n", insns, warnings); | ||
169 | else | ||
170 | fprintf(stderr, "Succeed: decoded and checked %d" | ||
171 | " instructions\n", insns); | ||
167 | return 0; | 172 | return 0; |
168 | } | 173 | } |
diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 43360c1d8f70..47bbdf9c38d0 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h | |||
@@ -137,13 +137,8 @@ struct ftrace_event_call { | |||
137 | 137 | ||
138 | #define FTRACE_MAX_PROFILE_SIZE 2048 | 138 | #define FTRACE_MAX_PROFILE_SIZE 2048 |
139 | 139 | ||
140 | struct perf_trace_buf { | 140 | extern char *perf_trace_buf; |
141 | char buf[FTRACE_MAX_PROFILE_SIZE]; | 141 | extern char *perf_trace_buf_nmi; |
142 | int recursion; | ||
143 | }; | ||
144 | |||
145 | extern struct perf_trace_buf *perf_trace_buf; | ||
146 | extern struct perf_trace_buf *perf_trace_buf_nmi; | ||
147 | 142 | ||
148 | #define MAX_FILTER_PRED 32 | 143 | #define MAX_FILTER_PRED 32 |
149 | #define MAX_FILTER_STR_VAL 256 /* Should handle KSYM_SYMBOL_LEN */ | 144 | #define MAX_FILTER_STR_VAL 256 /* Should handle KSYM_SYMBOL_LEN */ |
diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h new file mode 100644 index 000000000000..c9f7f7c7b0e0 --- /dev/null +++ b/include/linux/hw_breakpoint.h | |||
@@ -0,0 +1,140 @@ | |||
1 | #ifndef _LINUX_HW_BREAKPOINT_H | ||
2 | #define _LINUX_HW_BREAKPOINT_H | ||
3 | |||
4 | enum { | ||
5 | HW_BREAKPOINT_LEN_1 = 1, | ||
6 | HW_BREAKPOINT_LEN_2 = 2, | ||
7 | HW_BREAKPOINT_LEN_4 = 4, | ||
8 | HW_BREAKPOINT_LEN_8 = 8, | ||
9 | }; | ||
10 | |||
11 | enum { | ||
12 | HW_BREAKPOINT_R = 1, | ||
13 | HW_BREAKPOINT_W = 2, | ||
14 | HW_BREAKPOINT_X = 4, | ||
15 | }; | ||
16 | |||
17 | #ifdef __KERNEL__ | ||
18 | |||
19 | #include <linux/perf_event.h> | ||
20 | |||
21 | #ifdef CONFIG_HAVE_HW_BREAKPOINT | ||
22 | |||
23 | static inline unsigned long hw_breakpoint_addr(struct perf_event *bp) | ||
24 | { | ||
25 | return bp->attr.bp_addr; | ||
26 | } | ||
27 | |||
28 | static inline int hw_breakpoint_type(struct perf_event *bp) | ||
29 | { | ||
30 | return bp->attr.bp_type; | ||
31 | } | ||
32 | |||
33 | static inline int hw_breakpoint_len(struct perf_event *bp) | ||
34 | { | ||
35 | return bp->attr.bp_len; | ||
36 | } | ||
37 | |||
38 | extern struct perf_event * | ||
39 | register_user_hw_breakpoint(unsigned long addr, | ||
40 | int len, | ||
41 | int type, | ||
42 | perf_callback_t triggered, | ||
43 | struct task_struct *tsk, | ||
44 | bool active); | ||
45 | |||
46 | /* FIXME: only change from the attr, and don't unregister */ | ||
47 | extern struct perf_event * | ||
48 | modify_user_hw_breakpoint(struct perf_event *bp, | ||
49 | unsigned long addr, | ||
50 | int len, | ||
51 | int type, | ||
52 | perf_callback_t triggered, | ||
53 | struct task_struct *tsk, | ||
54 | bool active); | ||
55 | |||
56 | /* | ||
57 | * Kernel breakpoints are not associated with any particular thread. | ||
58 | */ | ||
59 | extern struct perf_event * | ||
60 | register_wide_hw_breakpoint_cpu(unsigned long addr, | ||
61 | int len, | ||
62 | int type, | ||
63 | perf_callback_t triggered, | ||
64 | int cpu, | ||
65 | bool active); | ||
66 | |||
67 | extern struct perf_event ** | ||
68 | register_wide_hw_breakpoint(unsigned long addr, | ||
69 | int len, | ||
70 | int type, | ||
71 | perf_callback_t triggered, | ||
72 | bool active); | ||
73 | |||
74 | extern int register_perf_hw_breakpoint(struct perf_event *bp); | ||
75 | extern int __register_perf_hw_breakpoint(struct perf_event *bp); | ||
76 | extern void unregister_hw_breakpoint(struct perf_event *bp); | ||
77 | extern void unregister_wide_hw_breakpoint(struct perf_event **cpu_events); | ||
78 | |||
79 | extern int reserve_bp_slot(struct perf_event *bp); | ||
80 | extern void release_bp_slot(struct perf_event *bp); | ||
81 | |||
82 | extern void flush_ptrace_hw_breakpoint(struct task_struct *tsk); | ||
83 | |||
84 | static inline struct arch_hw_breakpoint *counter_arch_bp(struct perf_event *bp) | ||
85 | { | ||
86 | return &bp->hw.info; | ||
87 | } | ||
88 | |||
89 | #else /* !CONFIG_HAVE_HW_BREAKPOINT */ | ||
90 | |||
91 | static inline struct perf_event * | ||
92 | register_user_hw_breakpoint(unsigned long addr, | ||
93 | int len, | ||
94 | int type, | ||
95 | perf_callback_t triggered, | ||
96 | struct task_struct *tsk, | ||
97 | bool active) { return NULL; } | ||
98 | static inline struct perf_event * | ||
99 | modify_user_hw_breakpoint(struct perf_event *bp, | ||
100 | unsigned long addr, | ||
101 | int len, | ||
102 | int type, | ||
103 | perf_callback_t triggered, | ||
104 | struct task_struct *tsk, | ||
105 | bool active) { return NULL; } | ||
106 | static inline struct perf_event * | ||
107 | register_wide_hw_breakpoint_cpu(unsigned long addr, | ||
108 | int len, | ||
109 | int type, | ||
110 | perf_callback_t triggered, | ||
111 | int cpu, | ||
112 | bool active) { return NULL; } | ||
113 | static inline struct perf_event ** | ||
114 | register_wide_hw_breakpoint(unsigned long addr, | ||
115 | int len, | ||
116 | int type, | ||
117 | perf_callback_t triggered, | ||
118 | bool active) { return NULL; } | ||
119 | static inline int | ||
120 | register_perf_hw_breakpoint(struct perf_event *bp) { return -ENOSYS; } | ||
121 | static inline int | ||
122 | __register_perf_hw_breakpoint(struct perf_event *bp) { return -ENOSYS; } | ||
123 | static inline void unregister_hw_breakpoint(struct perf_event *bp) { } | ||
124 | static inline void | ||
125 | unregister_wide_hw_breakpoint(struct perf_event **cpu_events) { } | ||
126 | static inline int | ||
127 | reserve_bp_slot(struct perf_event *bp) {return -ENOSYS; } | ||
128 | static inline void release_bp_slot(struct perf_event *bp) { } | ||
129 | |||
130 | static inline void flush_ptrace_hw_breakpoint(struct task_struct *tsk) { } | ||
131 | |||
132 | static inline struct arch_hw_breakpoint *counter_arch_bp(struct perf_event *bp) | ||
133 | { | ||
134 | return NULL; | ||
135 | } | ||
136 | |||
137 | #endif /* CONFIG_HAVE_HW_BREAKPOINT */ | ||
138 | #endif /* __KERNEL__ */ | ||
139 | |||
140 | #endif /* _LINUX_HW_BREAKPOINT_H */ | ||
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 7f87563c8485..43adbd7f0010 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h | |||
@@ -18,6 +18,10 @@ | |||
18 | #include <linux/ioctl.h> | 18 | #include <linux/ioctl.h> |
19 | #include <asm/byteorder.h> | 19 | #include <asm/byteorder.h> |
20 | 20 | ||
21 | #ifdef CONFIG_HAVE_HW_BREAKPOINT | ||
22 | #include <asm/hw_breakpoint.h> | ||
23 | #endif | ||
24 | |||
21 | /* | 25 | /* |
22 | * User-space ABI bits: | 26 | * User-space ABI bits: |
23 | */ | 27 | */ |
@@ -31,6 +35,7 @@ enum perf_type_id { | |||
31 | PERF_TYPE_TRACEPOINT = 2, | 35 | PERF_TYPE_TRACEPOINT = 2, |
32 | PERF_TYPE_HW_CACHE = 3, | 36 | PERF_TYPE_HW_CACHE = 3, |
33 | PERF_TYPE_RAW = 4, | 37 | PERF_TYPE_RAW = 4, |
38 | PERF_TYPE_BREAKPOINT = 5, | ||
34 | 39 | ||
35 | PERF_TYPE_MAX, /* non-ABI */ | 40 | PERF_TYPE_MAX, /* non-ABI */ |
36 | }; | 41 | }; |
@@ -209,6 +214,15 @@ struct perf_event_attr { | |||
209 | __u32 wakeup_events; /* wakeup every n events */ | 214 | __u32 wakeup_events; /* wakeup every n events */ |
210 | __u32 wakeup_watermark; /* bytes before wakeup */ | 215 | __u32 wakeup_watermark; /* bytes before wakeup */ |
211 | }; | 216 | }; |
217 | |||
218 | union { | ||
219 | struct { /* Hardware breakpoint info */ | ||
220 | __u64 bp_addr; | ||
221 | __u32 bp_type; | ||
222 | __u32 bp_len; | ||
223 | }; | ||
224 | }; | ||
225 | |||
212 | __u32 __reserved_2; | 226 | __u32 __reserved_2; |
213 | 227 | ||
214 | __u64 __reserved_3; | 228 | __u64 __reserved_3; |
@@ -478,6 +492,11 @@ struct hw_perf_event { | |||
478 | s64 remaining; | 492 | s64 remaining; |
479 | struct hrtimer hrtimer; | 493 | struct hrtimer hrtimer; |
480 | }; | 494 | }; |
495 | #ifdef CONFIG_HAVE_HW_BREAKPOINT | ||
496 | union { /* breakpoint */ | ||
497 | struct arch_hw_breakpoint info; | ||
498 | }; | ||
499 | #endif | ||
481 | }; | 500 | }; |
482 | atomic64_t prev_count; | 501 | atomic64_t prev_count; |
483 | u64 sample_period; | 502 | u64 sample_period; |
@@ -546,6 +565,10 @@ struct perf_pending_entry { | |||
546 | void (*func)(struct perf_pending_entry *); | 565 | void (*func)(struct perf_pending_entry *); |
547 | }; | 566 | }; |
548 | 567 | ||
568 | typedef void (*perf_callback_t)(struct perf_event *, void *); | ||
569 | |||
570 | struct perf_sample_data; | ||
571 | |||
549 | /** | 572 | /** |
550 | * struct perf_event - performance event kernel representation: | 573 | * struct perf_event - performance event kernel representation: |
551 | */ | 574 | */ |
@@ -588,7 +611,7 @@ struct perf_event { | |||
588 | u64 tstamp_running; | 611 | u64 tstamp_running; |
589 | u64 tstamp_stopped; | 612 | u64 tstamp_stopped; |
590 | 613 | ||
591 | struct perf_event_attr attr; | 614 | struct perf_event_attr attr; |
592 | struct hw_perf_event hw; | 615 | struct hw_perf_event hw; |
593 | 616 | ||
594 | struct perf_event_context *ctx; | 617 | struct perf_event_context *ctx; |
@@ -637,10 +660,18 @@ struct perf_event { | |||
637 | struct pid_namespace *ns; | 660 | struct pid_namespace *ns; |
638 | u64 id; | 661 | u64 id; |
639 | 662 | ||
663 | void (*overflow_handler)(struct perf_event *event, | ||
664 | int nmi, struct perf_sample_data *data, | ||
665 | struct pt_regs *regs); | ||
666 | |||
640 | #ifdef CONFIG_EVENT_PROFILE | 667 | #ifdef CONFIG_EVENT_PROFILE |
641 | struct event_filter *filter; | 668 | struct event_filter *filter; |
642 | #endif | 669 | #endif |
643 | 670 | ||
671 | perf_callback_t callback; | ||
672 | |||
673 | perf_callback_t event_callback; | ||
674 | |||
644 | #endif /* CONFIG_PERF_EVENTS */ | 675 | #endif /* CONFIG_PERF_EVENTS */ |
645 | }; | 676 | }; |
646 | 677 | ||
@@ -745,6 +776,14 @@ extern int hw_perf_group_sched_in(struct perf_event *group_leader, | |||
745 | struct perf_cpu_context *cpuctx, | 776 | struct perf_cpu_context *cpuctx, |
746 | struct perf_event_context *ctx, int cpu); | 777 | struct perf_event_context *ctx, int cpu); |
747 | extern void perf_event_update_userpage(struct perf_event *event); | 778 | extern void perf_event_update_userpage(struct perf_event *event); |
779 | extern int perf_event_release_kernel(struct perf_event *event); | ||
780 | extern struct perf_event * | ||
781 | perf_event_create_kernel_counter(struct perf_event_attr *attr, | ||
782 | int cpu, | ||
783 | pid_t pid, | ||
784 | perf_callback_t callback); | ||
785 | extern u64 perf_event_read_value(struct perf_event *event, | ||
786 | u64 *enabled, u64 *running); | ||
748 | 787 | ||
749 | struct perf_sample_data { | 788 | struct perf_sample_data { |
750 | u64 type; | 789 | u64 type; |
@@ -821,6 +860,7 @@ extern int sysctl_perf_event_sample_rate; | |||
821 | extern void perf_event_init(void); | 860 | extern void perf_event_init(void); |
822 | extern void perf_tp_event(int event_id, u64 addr, u64 count, | 861 | extern void perf_tp_event(int event_id, u64 addr, u64 count, |
823 | void *record, int entry_size); | 862 | void *record, int entry_size); |
863 | extern void perf_bp_event(struct perf_event *event, void *data); | ||
824 | 864 | ||
825 | #ifndef perf_misc_flags | 865 | #ifndef perf_misc_flags |
826 | #define perf_misc_flags(regs) (user_mode(regs) ? PERF_RECORD_MISC_USER : \ | 866 | #define perf_misc_flags(regs) (user_mode(regs) ? PERF_RECORD_MISC_USER : \ |
@@ -834,6 +874,8 @@ extern int perf_output_begin(struct perf_output_handle *handle, | |||
834 | extern void perf_output_end(struct perf_output_handle *handle); | 874 | extern void perf_output_end(struct perf_output_handle *handle); |
835 | extern void perf_output_copy(struct perf_output_handle *handle, | 875 | extern void perf_output_copy(struct perf_output_handle *handle, |
836 | const void *buf, unsigned int len); | 876 | const void *buf, unsigned int len); |
877 | extern int perf_swevent_get_recursion_context(void); | ||
878 | extern void perf_swevent_put_recursion_context(int rctx); | ||
837 | #else | 879 | #else |
838 | static inline void | 880 | static inline void |
839 | perf_event_task_sched_in(struct task_struct *task, int cpu) { } | 881 | perf_event_task_sched_in(struct task_struct *task, int cpu) { } |
@@ -855,11 +897,15 @@ static inline int perf_event_task_enable(void) { return -EINVAL; } | |||
855 | static inline void | 897 | static inline void |
856 | perf_sw_event(u32 event_id, u64 nr, int nmi, | 898 | perf_sw_event(u32 event_id, u64 nr, int nmi, |
857 | struct pt_regs *regs, u64 addr) { } | 899 | struct pt_regs *regs, u64 addr) { } |
900 | static inline void | ||
901 | perf_bp_event(struct perf_event *event, void *data) { } | ||
858 | 902 | ||
859 | static inline void perf_event_mmap(struct vm_area_struct *vma) { } | 903 | static inline void perf_event_mmap(struct vm_area_struct *vma) { } |
860 | static inline void perf_event_comm(struct task_struct *tsk) { } | 904 | static inline void perf_event_comm(struct task_struct *tsk) { } |
861 | static inline void perf_event_fork(struct task_struct *tsk) { } | 905 | static inline void perf_event_fork(struct task_struct *tsk) { } |
862 | static inline void perf_event_init(void) { } | 906 | static inline void perf_event_init(void) { } |
907 | static inline int perf_swevent_get_recursion_context(void) { return -1; } | ||
908 | static inline void perf_swevent_put_recursion_context(int rctx) { } | ||
863 | 909 | ||
864 | #endif | 910 | #endif |
865 | 911 | ||
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 4945d1c99864..c3417c13e3ed 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h | |||
@@ -724,17 +724,20 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ | |||
724 | static void ftrace_profile_##call(proto) \ | 724 | static void ftrace_profile_##call(proto) \ |
725 | { \ | 725 | { \ |
726 | struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\ | 726 | struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\ |
727 | extern int perf_swevent_get_recursion_context(void); \ | ||
728 | extern void perf_swevent_put_recursion_context(int rctx); \ | ||
727 | struct ftrace_event_call *event_call = &event_##call; \ | 729 | struct ftrace_event_call *event_call = &event_##call; \ |
728 | extern void perf_tp_event(int, u64, u64, void *, int); \ | 730 | extern void perf_tp_event(int, u64, u64, void *, int); \ |
729 | struct ftrace_raw_##call *entry; \ | 731 | struct ftrace_raw_##call *entry; \ |
730 | struct perf_trace_buf *trace_buf; \ | ||
731 | u64 __addr = 0, __count = 1; \ | 732 | u64 __addr = 0, __count = 1; \ |
732 | unsigned long irq_flags; \ | 733 | unsigned long irq_flags; \ |
733 | struct trace_entry *ent; \ | 734 | struct trace_entry *ent; \ |
734 | int __entry_size; \ | 735 | int __entry_size; \ |
735 | int __data_size; \ | 736 | int __data_size; \ |
737 | char *trace_buf; \ | ||
736 | char *raw_data; \ | 738 | char *raw_data; \ |
737 | int __cpu; \ | 739 | int __cpu; \ |
740 | int rctx; \ | ||
738 | int pc; \ | 741 | int pc; \ |
739 | \ | 742 | \ |
740 | pc = preempt_count(); \ | 743 | pc = preempt_count(); \ |
@@ -749,6 +752,11 @@ static void ftrace_profile_##call(proto) \ | |||
749 | return; \ | 752 | return; \ |
750 | \ | 753 | \ |
751 | local_irq_save(irq_flags); \ | 754 | local_irq_save(irq_flags); \ |
755 | \ | ||
756 | rctx = perf_swevent_get_recursion_context(); \ | ||
757 | if (rctx < 0) \ | ||
758 | goto end_recursion; \ | ||
759 | \ | ||
752 | __cpu = smp_processor_id(); \ | 760 | __cpu = smp_processor_id(); \ |
753 | \ | 761 | \ |
754 | if (in_nmi()) \ | 762 | if (in_nmi()) \ |
@@ -759,13 +767,7 @@ static void ftrace_profile_##call(proto) \ | |||
759 | if (!trace_buf) \ | 767 | if (!trace_buf) \ |
760 | goto end; \ | 768 | goto end; \ |
761 | \ | 769 | \ |
762 | trace_buf = per_cpu_ptr(trace_buf, __cpu); \ | 770 | raw_data = per_cpu_ptr(trace_buf, __cpu); \ |
763 | if (trace_buf->recursion++) \ | ||
764 | goto end_recursion; \ | ||
765 | \ | ||
766 | barrier(); \ | ||
767 | \ | ||
768 | raw_data = trace_buf->buf; \ | ||
769 | \ | 771 | \ |
770 | *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL; \ | 772 | *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL; \ |
771 | entry = (struct ftrace_raw_##call *)raw_data; \ | 773 | entry = (struct ftrace_raw_##call *)raw_data; \ |
@@ -780,9 +782,9 @@ static void ftrace_profile_##call(proto) \ | |||
780 | perf_tp_event(event_call->id, __addr, __count, entry, \ | 782 | perf_tp_event(event_call->id, __addr, __count, entry, \ |
781 | __entry_size); \ | 783 | __entry_size); \ |
782 | \ | 784 | \ |
783 | end_recursion: \ | ||
784 | trace_buf->recursion--; \ | ||
785 | end: \ | 785 | end: \ |
786 | perf_swevent_put_recursion_context(rctx); \ | ||
787 | end_recursion: \ | ||
786 | local_irq_restore(irq_flags); \ | 788 | local_irq_restore(irq_flags); \ |
787 | \ | 789 | \ |
788 | } | 790 | } |
diff --git a/kernel/Makefile b/kernel/Makefile index b8d4cd8ac0b9..6b7ce8173dfd 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
@@ -21,6 +21,7 @@ CFLAGS_REMOVE_mutex-debug.o = -pg | |||
21 | CFLAGS_REMOVE_rtmutex-debug.o = -pg | 21 | CFLAGS_REMOVE_rtmutex-debug.o = -pg |
22 | CFLAGS_REMOVE_cgroup-debug.o = -pg | 22 | CFLAGS_REMOVE_cgroup-debug.o = -pg |
23 | CFLAGS_REMOVE_sched_clock.o = -pg | 23 | CFLAGS_REMOVE_sched_clock.o = -pg |
24 | CFLAGS_REMOVE_perf_event.o = -pg | ||
24 | endif | 25 | endif |
25 | 26 | ||
26 | obj-$(CONFIG_FREEZER) += freezer.o | 27 | obj-$(CONFIG_FREEZER) += freezer.o |
@@ -95,6 +96,7 @@ obj-$(CONFIG_RING_BUFFER) += trace/ | |||
95 | obj-$(CONFIG_SMP) += sched_cpupri.o | 96 | obj-$(CONFIG_SMP) += sched_cpupri.o |
96 | obj-$(CONFIG_SLOW_WORK) += slow-work.o | 97 | obj-$(CONFIG_SLOW_WORK) += slow-work.o |
97 | obj-$(CONFIG_PERF_EVENTS) += perf_event.o | 98 | obj-$(CONFIG_PERF_EVENTS) += perf_event.o |
99 | obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o | ||
98 | 100 | ||
99 | ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) | 101 | ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) |
100 | # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is | 102 | # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is |
diff --git a/kernel/exit.c b/kernel/exit.c index f7864ac2ecc1..3f45e3cf931d 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -49,6 +49,7 @@ | |||
49 | #include <linux/init_task.h> | 49 | #include <linux/init_task.h> |
50 | #include <linux/perf_event.h> | 50 | #include <linux/perf_event.h> |
51 | #include <trace/events/sched.h> | 51 | #include <trace/events/sched.h> |
52 | #include <linux/hw_breakpoint.h> | ||
52 | 53 | ||
53 | #include <asm/uaccess.h> | 54 | #include <asm/uaccess.h> |
54 | #include <asm/unistd.h> | 55 | #include <asm/unistd.h> |
@@ -978,6 +979,10 @@ NORET_TYPE void do_exit(long code) | |||
978 | proc_exit_connector(tsk); | 979 | proc_exit_connector(tsk); |
979 | 980 | ||
980 | /* | 981 | /* |
982 | * FIXME: do that only when needed, using sched_exit tracepoint | ||
983 | */ | ||
984 | flush_ptrace_hw_breakpoint(tsk); | ||
985 | /* | ||
981 | * Flush inherited counters to the parent - before the parent | 986 | * Flush inherited counters to the parent - before the parent |
982 | * gets woken up by child-exit notifications. | 987 | * gets woken up by child-exit notifications. |
983 | */ | 988 | */ |
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c new file mode 100644 index 000000000000..06d372fc026d --- /dev/null +++ b/kernel/hw_breakpoint.c | |||
@@ -0,0 +1,501 @@ | |||
1 | /* | ||
2 | * This program is free software; you can redistribute it and/or modify | ||
3 | * it under the terms of the GNU General Public License as published by | ||
4 | * the Free Software Foundation; either version 2 of the License, or | ||
5 | * (at your option) any later version. | ||
6 | * | ||
7 | * This program is distributed in the hope that it will be useful, | ||
8 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
10 | * GNU General Public License for more details. | ||
11 | * | ||
12 | * You should have received a copy of the GNU General Public License | ||
13 | * along with this program; if not, write to the Free Software | ||
14 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
15 | * | ||
16 | * Copyright (C) 2007 Alan Stern | ||
17 | * Copyright (C) IBM Corporation, 2009 | ||
18 | * Copyright (C) 2009, Frederic Weisbecker <fweisbec@gmail.com> | ||
19 | * | ||
20 | * Thanks to Ingo Molnar for his many suggestions. | ||
21 | * | ||
22 | * Authors: Alan Stern <stern@rowland.harvard.edu> | ||
23 | * K.Prasad <prasad@linux.vnet.ibm.com> | ||
24 | * Frederic Weisbecker <fweisbec@gmail.com> | ||
25 | */ | ||
26 | |||
27 | /* | ||
28 | * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility, | ||
29 | * using the CPU's debug registers. | ||
30 | * This file contains the arch-independent routines. | ||
31 | */ | ||
32 | |||
33 | #include <linux/irqflags.h> | ||
34 | #include <linux/kallsyms.h> | ||
35 | #include <linux/notifier.h> | ||
36 | #include <linux/kprobes.h> | ||
37 | #include <linux/kdebug.h> | ||
38 | #include <linux/kernel.h> | ||
39 | #include <linux/module.h> | ||
40 | #include <linux/percpu.h> | ||
41 | #include <linux/sched.h> | ||
42 | #include <linux/init.h> | ||
43 | #include <linux/smp.h> | ||
44 | |||
45 | #include <linux/hw_breakpoint.h> | ||
46 | |||
47 | /* | ||
48 | * Constraints data | ||
49 | */ | ||
50 | |||
51 | /* Number of pinned cpu breakpoints in a cpu */ | ||
52 | static DEFINE_PER_CPU(unsigned int, nr_cpu_bp_pinned); | ||
53 | |||
54 | /* Number of pinned task breakpoints in a cpu */ | ||
55 | static DEFINE_PER_CPU(unsigned int, task_bp_pinned[HBP_NUM]); | ||
56 | |||
57 | /* Number of non-pinned cpu/task breakpoints in a cpu */ | ||
58 | static DEFINE_PER_CPU(unsigned int, nr_bp_flexible); | ||
59 | |||
60 | /* Gather the number of total pinned and un-pinned bp in a cpuset */ | ||
61 | struct bp_busy_slots { | ||
62 | unsigned int pinned; | ||
63 | unsigned int flexible; | ||
64 | }; | ||
65 | |||
66 | /* Serialize accesses to the above constraints */ | ||
67 | static DEFINE_MUTEX(nr_bp_mutex); | ||
68 | |||
69 | /* | ||
70 | * Report the maximum number of pinned breakpoints a task | ||
71 | * have in this cpu | ||
72 | */ | ||
73 | static unsigned int max_task_bp_pinned(int cpu) | ||
74 | { | ||
75 | int i; | ||
76 | unsigned int *tsk_pinned = per_cpu(task_bp_pinned, cpu); | ||
77 | |||
78 | for (i = HBP_NUM -1; i >= 0; i--) { | ||
79 | if (tsk_pinned[i] > 0) | ||
80 | return i + 1; | ||
81 | } | ||
82 | |||
83 | return 0; | ||
84 | } | ||
85 | |||
86 | /* | ||
87 | * Report the number of pinned/un-pinned breakpoints we have in | ||
88 | * a given cpu (cpu > -1) or in all of them (cpu = -1). | ||
89 | */ | ||
90 | static void fetch_bp_busy_slots(struct bp_busy_slots *slots, int cpu) | ||
91 | { | ||
92 | if (cpu >= 0) { | ||
93 | slots->pinned = per_cpu(nr_cpu_bp_pinned, cpu); | ||
94 | slots->pinned += max_task_bp_pinned(cpu); | ||
95 | slots->flexible = per_cpu(nr_bp_flexible, cpu); | ||
96 | |||
97 | return; | ||
98 | } | ||
99 | |||
100 | for_each_online_cpu(cpu) { | ||
101 | unsigned int nr; | ||
102 | |||
103 | nr = per_cpu(nr_cpu_bp_pinned, cpu); | ||
104 | nr += max_task_bp_pinned(cpu); | ||
105 | |||
106 | if (nr > slots->pinned) | ||
107 | slots->pinned = nr; | ||
108 | |||
109 | nr = per_cpu(nr_bp_flexible, cpu); | ||
110 | |||
111 | if (nr > slots->flexible) | ||
112 | slots->flexible = nr; | ||
113 | } | ||
114 | } | ||
115 | |||
116 | /* | ||
117 | * Add a pinned breakpoint for the given task in our constraint table | ||
118 | */ | ||
119 | static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable) | ||
120 | { | ||
121 | int count = 0; | ||
122 | struct perf_event *bp; | ||
123 | struct perf_event_context *ctx = tsk->perf_event_ctxp; | ||
124 | unsigned int *task_bp_pinned; | ||
125 | struct list_head *list; | ||
126 | unsigned long flags; | ||
127 | |||
128 | if (WARN_ONCE(!ctx, "No perf context for this task")) | ||
129 | return; | ||
130 | |||
131 | list = &ctx->event_list; | ||
132 | |||
133 | spin_lock_irqsave(&ctx->lock, flags); | ||
134 | |||
135 | /* | ||
136 | * The current breakpoint counter is not included in the list | ||
137 | * at the open() callback time | ||
138 | */ | ||
139 | list_for_each_entry(bp, list, event_entry) { | ||
140 | if (bp->attr.type == PERF_TYPE_BREAKPOINT) | ||
141 | count++; | ||
142 | } | ||
143 | |||
144 | spin_unlock_irqrestore(&ctx->lock, flags); | ||
145 | |||
146 | if (WARN_ONCE(count < 0, "No breakpoint counter found in the counter list")) | ||
147 | return; | ||
148 | |||
149 | task_bp_pinned = per_cpu(task_bp_pinned, cpu); | ||
150 | if (enable) { | ||
151 | task_bp_pinned[count]++; | ||
152 | if (count > 0) | ||
153 | task_bp_pinned[count-1]--; | ||
154 | } else { | ||
155 | task_bp_pinned[count]--; | ||
156 | if (count > 0) | ||
157 | task_bp_pinned[count-1]++; | ||
158 | } | ||
159 | } | ||
160 | |||
161 | /* | ||
162 | * Add/remove the given breakpoint in our constraint table | ||
163 | */ | ||
164 | static void toggle_bp_slot(struct perf_event *bp, bool enable) | ||
165 | { | ||
166 | int cpu = bp->cpu; | ||
167 | struct task_struct *tsk = bp->ctx->task; | ||
168 | |||
169 | /* Pinned counter task profiling */ | ||
170 | if (tsk) { | ||
171 | if (cpu >= 0) { | ||
172 | toggle_bp_task_slot(tsk, cpu, enable); | ||
173 | return; | ||
174 | } | ||
175 | |||
176 | for_each_online_cpu(cpu) | ||
177 | toggle_bp_task_slot(tsk, cpu, enable); | ||
178 | return; | ||
179 | } | ||
180 | |||
181 | /* Pinned counter cpu profiling */ | ||
182 | if (enable) | ||
183 | per_cpu(nr_cpu_bp_pinned, bp->cpu)++; | ||
184 | else | ||
185 | per_cpu(nr_cpu_bp_pinned, bp->cpu)--; | ||
186 | } | ||
187 | |||
188 | /* | ||
189 | * Contraints to check before allowing this new breakpoint counter: | ||
190 | * | ||
191 | * == Non-pinned counter == (Considered as pinned for now) | ||
192 | * | ||
193 | * - If attached to a single cpu, check: | ||
194 | * | ||
195 | * (per_cpu(nr_bp_flexible, cpu) || (per_cpu(nr_cpu_bp_pinned, cpu) | ||
196 | * + max(per_cpu(task_bp_pinned, cpu)))) < HBP_NUM | ||
197 | * | ||
198 | * -> If there are already non-pinned counters in this cpu, it means | ||
199 | * there is already a free slot for them. | ||
200 | * Otherwise, we check that the maximum number of per task | ||
201 | * breakpoints (for this cpu) plus the number of per cpu breakpoint | ||
202 | * (for this cpu) doesn't cover every registers. | ||
203 | * | ||
204 | * - If attached to every cpus, check: | ||
205 | * | ||
206 | * (per_cpu(nr_bp_flexible, *) || (max(per_cpu(nr_cpu_bp_pinned, *)) | ||
207 | * + max(per_cpu(task_bp_pinned, *)))) < HBP_NUM | ||
208 | * | ||
209 | * -> This is roughly the same, except we check the number of per cpu | ||
210 | * bp for every cpu and we keep the max one. Same for the per tasks | ||
211 | * breakpoints. | ||
212 | * | ||
213 | * | ||
214 | * == Pinned counter == | ||
215 | * | ||
216 | * - If attached to a single cpu, check: | ||
217 | * | ||
218 | * ((per_cpu(nr_bp_flexible, cpu) > 1) + per_cpu(nr_cpu_bp_pinned, cpu) | ||
219 | * + max(per_cpu(task_bp_pinned, cpu))) < HBP_NUM | ||
220 | * | ||
221 | * -> Same checks as before. But now the nr_bp_flexible, if any, must keep | ||
222 | * one register at least (or they will never be fed). | ||
223 | * | ||
224 | * - If attached to every cpus, check: | ||
225 | * | ||
226 | * ((per_cpu(nr_bp_flexible, *) > 1) + max(per_cpu(nr_cpu_bp_pinned, *)) | ||
227 | * + max(per_cpu(task_bp_pinned, *))) < HBP_NUM | ||
228 | */ | ||
229 | int reserve_bp_slot(struct perf_event *bp) | ||
230 | { | ||
231 | struct bp_busy_slots slots = {0}; | ||
232 | int ret = 0; | ||
233 | |||
234 | mutex_lock(&nr_bp_mutex); | ||
235 | |||
236 | fetch_bp_busy_slots(&slots, bp->cpu); | ||
237 | |||
238 | /* Flexible counters need to keep at least one slot */ | ||
239 | if (slots.pinned + (!!slots.flexible) == HBP_NUM) { | ||
240 | ret = -ENOSPC; | ||
241 | goto end; | ||
242 | } | ||
243 | |||
244 | toggle_bp_slot(bp, true); | ||
245 | |||
246 | end: | ||
247 | mutex_unlock(&nr_bp_mutex); | ||
248 | |||
249 | return ret; | ||
250 | } | ||
251 | |||
252 | void release_bp_slot(struct perf_event *bp) | ||
253 | { | ||
254 | mutex_lock(&nr_bp_mutex); | ||
255 | |||
256 | toggle_bp_slot(bp, false); | ||
257 | |||
258 | mutex_unlock(&nr_bp_mutex); | ||
259 | } | ||
260 | |||
261 | |||
262 | int __register_perf_hw_breakpoint(struct perf_event *bp) | ||
263 | { | ||
264 | int ret; | ||
265 | |||
266 | ret = reserve_bp_slot(bp); | ||
267 | if (ret) | ||
268 | return ret; | ||
269 | |||
270 | /* | ||
271 | * Ptrace breakpoints can be temporary perf events only | ||
272 | * meant to reserve a slot. In this case, it is created disabled and | ||
273 | * we don't want to check the params right now (as we put a null addr) | ||
274 | * But perf tools create events as disabled and we want to check | ||
275 | * the params for them. | ||
276 | * This is a quick hack that will be removed soon, once we remove | ||
277 | * the tmp breakpoints from ptrace | ||
278 | */ | ||
279 | if (!bp->attr.disabled || bp->callback == perf_bp_event) | ||
280 | ret = arch_validate_hwbkpt_settings(bp, bp->ctx->task); | ||
281 | |||
282 | return ret; | ||
283 | } | ||
284 | |||
285 | int register_perf_hw_breakpoint(struct perf_event *bp) | ||
286 | { | ||
287 | bp->callback = perf_bp_event; | ||
288 | |||
289 | return __register_perf_hw_breakpoint(bp); | ||
290 | } | ||
291 | |||
292 | /* | ||
293 | * Register a breakpoint bound to a task and a given cpu. | ||
294 | * If cpu is -1, the breakpoint is active for the task in every cpu | ||
295 | * If the task is -1, the breakpoint is active for every tasks in the given | ||
296 | * cpu. | ||
297 | */ | ||
298 | static struct perf_event * | ||
299 | register_user_hw_breakpoint_cpu(unsigned long addr, | ||
300 | int len, | ||
301 | int type, | ||
302 | perf_callback_t triggered, | ||
303 | pid_t pid, | ||
304 | int cpu, | ||
305 | bool active) | ||
306 | { | ||
307 | struct perf_event_attr *attr; | ||
308 | struct perf_event *bp; | ||
309 | |||
310 | attr = kzalloc(sizeof(*attr), GFP_KERNEL); | ||
311 | if (!attr) | ||
312 | return ERR_PTR(-ENOMEM); | ||
313 | |||
314 | attr->type = PERF_TYPE_BREAKPOINT; | ||
315 | attr->size = sizeof(*attr); | ||
316 | attr->bp_addr = addr; | ||
317 | attr->bp_len = len; | ||
318 | attr->bp_type = type; | ||
319 | /* | ||
320 | * Such breakpoints are used by debuggers to trigger signals when | ||
321 | * we hit the excepted memory op. We can't miss such events, they | ||
322 | * must be pinned. | ||
323 | */ | ||
324 | attr->pinned = 1; | ||
325 | |||
326 | if (!active) | ||
327 | attr->disabled = 1; | ||
328 | |||
329 | bp = perf_event_create_kernel_counter(attr, cpu, pid, triggered); | ||
330 | kfree(attr); | ||
331 | |||
332 | return bp; | ||
333 | } | ||
334 | |||
335 | /** | ||
336 | * register_user_hw_breakpoint - register a hardware breakpoint for user space | ||
337 | * @addr: is the memory address that triggers the breakpoint | ||
338 | * @len: the length of the access to the memory (1 byte, 2 bytes etc...) | ||
339 | * @type: the type of the access to the memory (read/write/exec) | ||
340 | * @triggered: callback to trigger when we hit the breakpoint | ||
341 | * @tsk: pointer to 'task_struct' of the process to which the address belongs | ||
342 | * @active: should we activate it while registering it | ||
343 | * | ||
344 | */ | ||
345 | struct perf_event * | ||
346 | register_user_hw_breakpoint(unsigned long addr, | ||
347 | int len, | ||
348 | int type, | ||
349 | perf_callback_t triggered, | ||
350 | struct task_struct *tsk, | ||
351 | bool active) | ||
352 | { | ||
353 | return register_user_hw_breakpoint_cpu(addr, len, type, triggered, | ||
354 | tsk->pid, -1, active); | ||
355 | } | ||
356 | EXPORT_SYMBOL_GPL(register_user_hw_breakpoint); | ||
357 | |||
358 | /** | ||
359 | * modify_user_hw_breakpoint - modify a user-space hardware breakpoint | ||
360 | * @bp: the breakpoint structure to modify | ||
361 | * @addr: is the memory address that triggers the breakpoint | ||
362 | * @len: the length of the access to the memory (1 byte, 2 bytes etc...) | ||
363 | * @type: the type of the access to the memory (read/write/exec) | ||
364 | * @triggered: callback to trigger when we hit the breakpoint | ||
365 | * @tsk: pointer to 'task_struct' of the process to which the address belongs | ||
366 | * @active: should we activate it while registering it | ||
367 | */ | ||
368 | struct perf_event * | ||
369 | modify_user_hw_breakpoint(struct perf_event *bp, | ||
370 | unsigned long addr, | ||
371 | int len, | ||
372 | int type, | ||
373 | perf_callback_t triggered, | ||
374 | struct task_struct *tsk, | ||
375 | bool active) | ||
376 | { | ||
377 | /* | ||
378 | * FIXME: do it without unregistering | ||
379 | * - We don't want to lose our slot | ||
380 | * - If the new bp is incorrect, don't lose the older one | ||
381 | */ | ||
382 | unregister_hw_breakpoint(bp); | ||
383 | |||
384 | return register_user_hw_breakpoint(addr, len, type, triggered, | ||
385 | tsk, active); | ||
386 | } | ||
387 | EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint); | ||
388 | |||
389 | /** | ||
390 | * unregister_hw_breakpoint - unregister a user-space hardware breakpoint | ||
391 | * @bp: the breakpoint structure to unregister | ||
392 | */ | ||
393 | void unregister_hw_breakpoint(struct perf_event *bp) | ||
394 | { | ||
395 | if (!bp) | ||
396 | return; | ||
397 | perf_event_release_kernel(bp); | ||
398 | } | ||
399 | EXPORT_SYMBOL_GPL(unregister_hw_breakpoint); | ||
400 | |||
401 | static struct perf_event * | ||
402 | register_kernel_hw_breakpoint_cpu(unsigned long addr, | ||
403 | int len, | ||
404 | int type, | ||
405 | perf_callback_t triggered, | ||
406 | int cpu, | ||
407 | bool active) | ||
408 | { | ||
409 | return register_user_hw_breakpoint_cpu(addr, len, type, triggered, | ||
410 | -1, cpu, active); | ||
411 | } | ||
412 | |||
413 | /** | ||
414 | * register_wide_hw_breakpoint - register a wide breakpoint in the kernel | ||
415 | * @addr: is the memory address that triggers the breakpoint | ||
416 | * @len: the length of the access to the memory (1 byte, 2 bytes etc...) | ||
417 | * @type: the type of the access to the memory (read/write/exec) | ||
418 | * @triggered: callback to trigger when we hit the breakpoint | ||
419 | * @active: should we activate it while registering it | ||
420 | * | ||
421 | * @return a set of per_cpu pointers to perf events | ||
422 | */ | ||
423 | struct perf_event ** | ||
424 | register_wide_hw_breakpoint(unsigned long addr, | ||
425 | int len, | ||
426 | int type, | ||
427 | perf_callback_t triggered, | ||
428 | bool active) | ||
429 | { | ||
430 | struct perf_event **cpu_events, **pevent, *bp; | ||
431 | long err; | ||
432 | int cpu; | ||
433 | |||
434 | cpu_events = alloc_percpu(typeof(*cpu_events)); | ||
435 | if (!cpu_events) | ||
436 | return ERR_PTR(-ENOMEM); | ||
437 | |||
438 | for_each_possible_cpu(cpu) { | ||
439 | pevent = per_cpu_ptr(cpu_events, cpu); | ||
440 | bp = register_kernel_hw_breakpoint_cpu(addr, len, type, | ||
441 | triggered, cpu, active); | ||
442 | |||
443 | *pevent = bp; | ||
444 | |||
445 | if (IS_ERR(bp) || !bp) { | ||
446 | err = PTR_ERR(bp); | ||
447 | goto fail; | ||
448 | } | ||
449 | } | ||
450 | |||
451 | return cpu_events; | ||
452 | |||
453 | fail: | ||
454 | for_each_possible_cpu(cpu) { | ||
455 | pevent = per_cpu_ptr(cpu_events, cpu); | ||
456 | if (IS_ERR(*pevent) || !*pevent) | ||
457 | break; | ||
458 | unregister_hw_breakpoint(*pevent); | ||
459 | } | ||
460 | free_percpu(cpu_events); | ||
461 | /* return the error if any */ | ||
462 | return ERR_PTR(err); | ||
463 | } | ||
464 | EXPORT_SYMBOL_GPL(register_wide_hw_breakpoint); | ||
465 | |||
466 | /** | ||
467 | * unregister_wide_hw_breakpoint - unregister a wide breakpoint in the kernel | ||
468 | * @cpu_events: the per cpu set of events to unregister | ||
469 | */ | ||
470 | void unregister_wide_hw_breakpoint(struct perf_event **cpu_events) | ||
471 | { | ||
472 | int cpu; | ||
473 | struct perf_event **pevent; | ||
474 | |||
475 | for_each_possible_cpu(cpu) { | ||
476 | pevent = per_cpu_ptr(cpu_events, cpu); | ||
477 | unregister_hw_breakpoint(*pevent); | ||
478 | } | ||
479 | free_percpu(cpu_events); | ||
480 | } | ||
481 | EXPORT_SYMBOL_GPL(unregister_wide_hw_breakpoint); | ||
482 | |||
483 | static struct notifier_block hw_breakpoint_exceptions_nb = { | ||
484 | .notifier_call = hw_breakpoint_exceptions_notify, | ||
485 | /* we need to be notified first */ | ||
486 | .priority = 0x7fffffff | ||
487 | }; | ||
488 | |||
489 | static int __init init_hw_breakpoint(void) | ||
490 | { | ||
491 | return register_die_notifier(&hw_breakpoint_exceptions_nb); | ||
492 | } | ||
493 | core_initcall(init_hw_breakpoint); | ||
494 | |||
495 | |||
496 | struct pmu perf_ops_bp = { | ||
497 | .enable = arch_install_hw_breakpoint, | ||
498 | .disable = arch_uninstall_hw_breakpoint, | ||
499 | .read = hw_breakpoint_pmu_read, | ||
500 | .unthrottle = hw_breakpoint_pmu_unthrottle | ||
501 | }; | ||
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index 8b6b8b697c68..8e5288a8a355 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c | |||
@@ -181,6 +181,7 @@ unsigned long kallsyms_lookup_name(const char *name) | |||
181 | } | 181 | } |
182 | return module_kallsyms_lookup_name(name); | 182 | return module_kallsyms_lookup_name(name); |
183 | } | 183 | } |
184 | EXPORT_SYMBOL_GPL(kallsyms_lookup_name); | ||
184 | 185 | ||
185 | int kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *, | 186 | int kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *, |
186 | unsigned long), | 187 | unsigned long), |
diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 3256e36ad251..accfd7bfe387 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c | |||
@@ -29,6 +29,7 @@ | |||
29 | #include <linux/kernel_stat.h> | 29 | #include <linux/kernel_stat.h> |
30 | #include <linux/perf_event.h> | 30 | #include <linux/perf_event.h> |
31 | #include <linux/ftrace_event.h> | 31 | #include <linux/ftrace_event.h> |
32 | #include <linux/hw_breakpoint.h> | ||
32 | 33 | ||
33 | #include <asm/irq_regs.h> | 34 | #include <asm/irq_regs.h> |
34 | 35 | ||
@@ -245,6 +246,49 @@ static void perf_unpin_context(struct perf_event_context *ctx) | |||
245 | put_ctx(ctx); | 246 | put_ctx(ctx); |
246 | } | 247 | } |
247 | 248 | ||
249 | static inline u64 perf_clock(void) | ||
250 | { | ||
251 | return cpu_clock(smp_processor_id()); | ||
252 | } | ||
253 | |||
254 | /* | ||
255 | * Update the record of the current time in a context. | ||
256 | */ | ||
257 | static void update_context_time(struct perf_event_context *ctx) | ||
258 | { | ||
259 | u64 now = perf_clock(); | ||
260 | |||
261 | ctx->time += now - ctx->timestamp; | ||
262 | ctx->timestamp = now; | ||
263 | } | ||
264 | |||
265 | /* | ||
266 | * Update the total_time_enabled and total_time_running fields for a event. | ||
267 | */ | ||
268 | static void update_event_times(struct perf_event *event) | ||
269 | { | ||
270 | struct perf_event_context *ctx = event->ctx; | ||
271 | u64 run_end; | ||
272 | |||
273 | if (event->state < PERF_EVENT_STATE_INACTIVE || | ||
274 | event->group_leader->state < PERF_EVENT_STATE_INACTIVE) | ||
275 | return; | ||
276 | |||
277 | if (ctx->is_active) | ||
278 | run_end = ctx->time; | ||
279 | else | ||
280 | run_end = event->tstamp_stopped; | ||
281 | |||
282 | event->total_time_enabled = run_end - event->tstamp_enabled; | ||
283 | |||
284 | if (event->state == PERF_EVENT_STATE_INACTIVE) | ||
285 | run_end = event->tstamp_stopped; | ||
286 | else | ||
287 | run_end = ctx->time; | ||
288 | |||
289 | event->total_time_running = run_end - event->tstamp_running; | ||
290 | } | ||
291 | |||
248 | /* | 292 | /* |
249 | * Add a event from the lists for its context. | 293 | * Add a event from the lists for its context. |
250 | * Must be called with ctx->mutex and ctx->lock held. | 294 | * Must be called with ctx->mutex and ctx->lock held. |
@@ -293,6 +337,9 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx) | |||
293 | if (event->group_leader != event) | 337 | if (event->group_leader != event) |
294 | event->group_leader->nr_siblings--; | 338 | event->group_leader->nr_siblings--; |
295 | 339 | ||
340 | update_event_times(event); | ||
341 | event->state = PERF_EVENT_STATE_OFF; | ||
342 | |||
296 | /* | 343 | /* |
297 | * If this was a group event with sibling events then | 344 | * If this was a group event with sibling events then |
298 | * upgrade the siblings to singleton events by adding them | 345 | * upgrade the siblings to singleton events by adding them |
@@ -446,50 +493,11 @@ retry: | |||
446 | * can remove the event safely, if the call above did not | 493 | * can remove the event safely, if the call above did not |
447 | * succeed. | 494 | * succeed. |
448 | */ | 495 | */ |
449 | if (!list_empty(&event->group_entry)) { | 496 | if (!list_empty(&event->group_entry)) |
450 | list_del_event(event, ctx); | 497 | list_del_event(event, ctx); |
451 | } | ||
452 | spin_unlock_irq(&ctx->lock); | 498 | spin_unlock_irq(&ctx->lock); |
453 | } | 499 | } |
454 | 500 | ||
455 | static inline u64 perf_clock(void) | ||
456 | { | ||
457 | return cpu_clock(smp_processor_id()); | ||
458 | } | ||
459 | |||
460 | /* | ||
461 | * Update the record of the current time in a context. | ||
462 | */ | ||
463 | static void update_context_time(struct perf_event_context *ctx) | ||
464 | { | ||
465 | u64 now = perf_clock(); | ||
466 | |||
467 | ctx->time += now - ctx->timestamp; | ||
468 | ctx->timestamp = now; | ||
469 | } | ||
470 | |||
471 | /* | ||
472 | * Update the total_time_enabled and total_time_running fields for a event. | ||
473 | */ | ||
474 | static void update_event_times(struct perf_event *event) | ||
475 | { | ||
476 | struct perf_event_context *ctx = event->ctx; | ||
477 | u64 run_end; | ||
478 | |||
479 | if (event->state < PERF_EVENT_STATE_INACTIVE || | ||
480 | event->group_leader->state < PERF_EVENT_STATE_INACTIVE) | ||
481 | return; | ||
482 | |||
483 | event->total_time_enabled = ctx->time - event->tstamp_enabled; | ||
484 | |||
485 | if (event->state == PERF_EVENT_STATE_INACTIVE) | ||
486 | run_end = event->tstamp_stopped; | ||
487 | else | ||
488 | run_end = ctx->time; | ||
489 | |||
490 | event->total_time_running = run_end - event->tstamp_running; | ||
491 | } | ||
492 | |||
493 | /* | 501 | /* |
494 | * Update total_time_enabled and total_time_running for all events in a group. | 502 | * Update total_time_enabled and total_time_running for all events in a group. |
495 | */ | 503 | */ |
@@ -1032,10 +1040,10 @@ void __perf_event_sched_out(struct perf_event_context *ctx, | |||
1032 | update_context_time(ctx); | 1040 | update_context_time(ctx); |
1033 | 1041 | ||
1034 | perf_disable(); | 1042 | perf_disable(); |
1035 | if (ctx->nr_active) | 1043 | if (ctx->nr_active) { |
1036 | list_for_each_entry(event, &ctx->group_list, group_entry) | 1044 | list_for_each_entry(event, &ctx->group_list, group_entry) |
1037 | group_sched_out(event, cpuctx, ctx); | 1045 | group_sched_out(event, cpuctx, ctx); |
1038 | 1046 | } | |
1039 | perf_enable(); | 1047 | perf_enable(); |
1040 | out: | 1048 | out: |
1041 | spin_unlock(&ctx->lock); | 1049 | spin_unlock(&ctx->lock); |
@@ -1060,8 +1068,6 @@ static int context_equiv(struct perf_event_context *ctx1, | |||
1060 | && !ctx1->pin_count && !ctx2->pin_count; | 1068 | && !ctx1->pin_count && !ctx2->pin_count; |
1061 | } | 1069 | } |
1062 | 1070 | ||
1063 | static void __perf_event_read(void *event); | ||
1064 | |||
1065 | static void __perf_event_sync_stat(struct perf_event *event, | 1071 | static void __perf_event_sync_stat(struct perf_event *event, |
1066 | struct perf_event *next_event) | 1072 | struct perf_event *next_event) |
1067 | { | 1073 | { |
@@ -1079,8 +1085,8 @@ static void __perf_event_sync_stat(struct perf_event *event, | |||
1079 | */ | 1085 | */ |
1080 | switch (event->state) { | 1086 | switch (event->state) { |
1081 | case PERF_EVENT_STATE_ACTIVE: | 1087 | case PERF_EVENT_STATE_ACTIVE: |
1082 | __perf_event_read(event); | 1088 | event->pmu->read(event); |
1083 | break; | 1089 | /* fall-through */ |
1084 | 1090 | ||
1085 | case PERF_EVENT_STATE_INACTIVE: | 1091 | case PERF_EVENT_STATE_INACTIVE: |
1086 | update_event_times(event); | 1092 | update_event_times(event); |
@@ -1119,6 +1125,8 @@ static void perf_event_sync_stat(struct perf_event_context *ctx, | |||
1119 | if (!ctx->nr_stat) | 1125 | if (!ctx->nr_stat) |
1120 | return; | 1126 | return; |
1121 | 1127 | ||
1128 | update_context_time(ctx); | ||
1129 | |||
1122 | event = list_first_entry(&ctx->event_list, | 1130 | event = list_first_entry(&ctx->event_list, |
1123 | struct perf_event, event_entry); | 1131 | struct perf_event, event_entry); |
1124 | 1132 | ||
@@ -1162,8 +1170,6 @@ void perf_event_task_sched_out(struct task_struct *task, | |||
1162 | if (likely(!ctx || !cpuctx->task_ctx)) | 1170 | if (likely(!ctx || !cpuctx->task_ctx)) |
1163 | return; | 1171 | return; |
1164 | 1172 | ||
1165 | update_context_time(ctx); | ||
1166 | |||
1167 | rcu_read_lock(); | 1173 | rcu_read_lock(); |
1168 | parent = rcu_dereference(ctx->parent_ctx); | 1174 | parent = rcu_dereference(ctx->parent_ctx); |
1169 | next_ctx = next->perf_event_ctxp; | 1175 | next_ctx = next->perf_event_ctxp; |
@@ -1516,7 +1522,6 @@ static void __perf_event_read(void *info) | |||
1516 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); | 1522 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); |
1517 | struct perf_event *event = info; | 1523 | struct perf_event *event = info; |
1518 | struct perf_event_context *ctx = event->ctx; | 1524 | struct perf_event_context *ctx = event->ctx; |
1519 | unsigned long flags; | ||
1520 | 1525 | ||
1521 | /* | 1526 | /* |
1522 | * If this is a task context, we need to check whether it is | 1527 | * If this is a task context, we need to check whether it is |
@@ -1528,12 +1533,12 @@ static void __perf_event_read(void *info) | |||
1528 | if (ctx->task && cpuctx->task_ctx != ctx) | 1533 | if (ctx->task && cpuctx->task_ctx != ctx) |
1529 | return; | 1534 | return; |
1530 | 1535 | ||
1531 | local_irq_save(flags); | 1536 | spin_lock(&ctx->lock); |
1532 | if (ctx->is_active) | 1537 | update_context_time(ctx); |
1533 | update_context_time(ctx); | ||
1534 | event->pmu->read(event); | ||
1535 | update_event_times(event); | 1538 | update_event_times(event); |
1536 | local_irq_restore(flags); | 1539 | spin_unlock(&ctx->lock); |
1540 | |||
1541 | event->pmu->read(event); | ||
1537 | } | 1542 | } |
1538 | 1543 | ||
1539 | static u64 perf_event_read(struct perf_event *event) | 1544 | static u64 perf_event_read(struct perf_event *event) |
@@ -1546,7 +1551,13 @@ static u64 perf_event_read(struct perf_event *event) | |||
1546 | smp_call_function_single(event->oncpu, | 1551 | smp_call_function_single(event->oncpu, |
1547 | __perf_event_read, event, 1); | 1552 | __perf_event_read, event, 1); |
1548 | } else if (event->state == PERF_EVENT_STATE_INACTIVE) { | 1553 | } else if (event->state == PERF_EVENT_STATE_INACTIVE) { |
1554 | struct perf_event_context *ctx = event->ctx; | ||
1555 | unsigned long flags; | ||
1556 | |||
1557 | spin_lock_irqsave(&ctx->lock, flags); | ||
1558 | update_context_time(ctx); | ||
1549 | update_event_times(event); | 1559 | update_event_times(event); |
1560 | spin_unlock_irqrestore(&ctx->lock, flags); | ||
1550 | } | 1561 | } |
1551 | 1562 | ||
1552 | return atomic64_read(&event->count); | 1563 | return atomic64_read(&event->count); |
@@ -1700,16 +1711,10 @@ static void free_event(struct perf_event *event) | |||
1700 | call_rcu(&event->rcu_head, free_event_rcu); | 1711 | call_rcu(&event->rcu_head, free_event_rcu); |
1701 | } | 1712 | } |
1702 | 1713 | ||
1703 | /* | 1714 | int perf_event_release_kernel(struct perf_event *event) |
1704 | * Called when the last reference to the file is gone. | ||
1705 | */ | ||
1706 | static int perf_release(struct inode *inode, struct file *file) | ||
1707 | { | 1715 | { |
1708 | struct perf_event *event = file->private_data; | ||
1709 | struct perf_event_context *ctx = event->ctx; | 1716 | struct perf_event_context *ctx = event->ctx; |
1710 | 1717 | ||
1711 | file->private_data = NULL; | ||
1712 | |||
1713 | WARN_ON_ONCE(ctx->parent_ctx); | 1718 | WARN_ON_ONCE(ctx->parent_ctx); |
1714 | mutex_lock(&ctx->mutex); | 1719 | mutex_lock(&ctx->mutex); |
1715 | perf_event_remove_from_context(event); | 1720 | perf_event_remove_from_context(event); |
@@ -1724,6 +1729,19 @@ static int perf_release(struct inode *inode, struct file *file) | |||
1724 | 1729 | ||
1725 | return 0; | 1730 | return 0; |
1726 | } | 1731 | } |
1732 | EXPORT_SYMBOL_GPL(perf_event_release_kernel); | ||
1733 | |||
1734 | /* | ||
1735 | * Called when the last reference to the file is gone. | ||
1736 | */ | ||
1737 | static int perf_release(struct inode *inode, struct file *file) | ||
1738 | { | ||
1739 | struct perf_event *event = file->private_data; | ||
1740 | |||
1741 | file->private_data = NULL; | ||
1742 | |||
1743 | return perf_event_release_kernel(event); | ||
1744 | } | ||
1727 | 1745 | ||
1728 | static int perf_event_read_size(struct perf_event *event) | 1746 | static int perf_event_read_size(struct perf_event *event) |
1729 | { | 1747 | { |
@@ -1750,91 +1768,94 @@ static int perf_event_read_size(struct perf_event *event) | |||
1750 | return size; | 1768 | return size; |
1751 | } | 1769 | } |
1752 | 1770 | ||
1753 | static u64 perf_event_read_value(struct perf_event *event) | 1771 | u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) |
1754 | { | 1772 | { |
1755 | struct perf_event *child; | 1773 | struct perf_event *child; |
1756 | u64 total = 0; | 1774 | u64 total = 0; |
1757 | 1775 | ||
1776 | *enabled = 0; | ||
1777 | *running = 0; | ||
1778 | |||
1779 | mutex_lock(&event->child_mutex); | ||
1758 | total += perf_event_read(event); | 1780 | total += perf_event_read(event); |
1759 | list_for_each_entry(child, &event->child_list, child_list) | 1781 | *enabled += event->total_time_enabled + |
1782 | atomic64_read(&event->child_total_time_enabled); | ||
1783 | *running += event->total_time_running + | ||
1784 | atomic64_read(&event->child_total_time_running); | ||
1785 | |||
1786 | list_for_each_entry(child, &event->child_list, child_list) { | ||
1760 | total += perf_event_read(child); | 1787 | total += perf_event_read(child); |
1788 | *enabled += child->total_time_enabled; | ||
1789 | *running += child->total_time_running; | ||
1790 | } | ||
1791 | mutex_unlock(&event->child_mutex); | ||
1761 | 1792 | ||
1762 | return total; | 1793 | return total; |
1763 | } | 1794 | } |
1764 | 1795 | EXPORT_SYMBOL_GPL(perf_event_read_value); | |
1765 | static int perf_event_read_entry(struct perf_event *event, | ||
1766 | u64 read_format, char __user *buf) | ||
1767 | { | ||
1768 | int n = 0, count = 0; | ||
1769 | u64 values[2]; | ||
1770 | |||
1771 | values[n++] = perf_event_read_value(event); | ||
1772 | if (read_format & PERF_FORMAT_ID) | ||
1773 | values[n++] = primary_event_id(event); | ||
1774 | |||
1775 | count = n * sizeof(u64); | ||
1776 | |||
1777 | if (copy_to_user(buf, values, count)) | ||
1778 | return -EFAULT; | ||
1779 | |||
1780 | return count; | ||
1781 | } | ||
1782 | 1796 | ||
1783 | static int perf_event_read_group(struct perf_event *event, | 1797 | static int perf_event_read_group(struct perf_event *event, |
1784 | u64 read_format, char __user *buf) | 1798 | u64 read_format, char __user *buf) |
1785 | { | 1799 | { |
1786 | struct perf_event *leader = event->group_leader, *sub; | 1800 | struct perf_event *leader = event->group_leader, *sub; |
1787 | int n = 0, size = 0, err = -EFAULT; | 1801 | int n = 0, size = 0, ret = -EFAULT; |
1788 | u64 values[3]; | 1802 | struct perf_event_context *ctx = leader->ctx; |
1803 | u64 values[5]; | ||
1804 | u64 count, enabled, running; | ||
1805 | |||
1806 | mutex_lock(&ctx->mutex); | ||
1807 | count = perf_event_read_value(leader, &enabled, &running); | ||
1789 | 1808 | ||
1790 | values[n++] = 1 + leader->nr_siblings; | 1809 | values[n++] = 1 + leader->nr_siblings; |
1791 | if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { | 1810 | if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) |
1792 | values[n++] = leader->total_time_enabled + | 1811 | values[n++] = enabled; |
1793 | atomic64_read(&leader->child_total_time_enabled); | 1812 | if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) |
1794 | } | 1813 | values[n++] = running; |
1795 | if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { | 1814 | values[n++] = count; |
1796 | values[n++] = leader->total_time_running + | 1815 | if (read_format & PERF_FORMAT_ID) |
1797 | atomic64_read(&leader->child_total_time_running); | 1816 | values[n++] = primary_event_id(leader); |
1798 | } | ||
1799 | 1817 | ||
1800 | size = n * sizeof(u64); | 1818 | size = n * sizeof(u64); |
1801 | 1819 | ||
1802 | if (copy_to_user(buf, values, size)) | 1820 | if (copy_to_user(buf, values, size)) |
1803 | return -EFAULT; | 1821 | goto unlock; |
1804 | |||
1805 | err = perf_event_read_entry(leader, read_format, buf + size); | ||
1806 | if (err < 0) | ||
1807 | return err; | ||
1808 | 1822 | ||
1809 | size += err; | 1823 | ret = size; |
1810 | 1824 | ||
1811 | list_for_each_entry(sub, &leader->sibling_list, group_entry) { | 1825 | list_for_each_entry(sub, &leader->sibling_list, group_entry) { |
1812 | err = perf_event_read_entry(sub, read_format, | 1826 | n = 0; |
1813 | buf + size); | ||
1814 | if (err < 0) | ||
1815 | return err; | ||
1816 | 1827 | ||
1817 | size += err; | 1828 | values[n++] = perf_event_read_value(sub, &enabled, &running); |
1829 | if (read_format & PERF_FORMAT_ID) | ||
1830 | values[n++] = primary_event_id(sub); | ||
1831 | |||
1832 | size = n * sizeof(u64); | ||
1833 | |||
1834 | if (copy_to_user(buf + ret, values, size)) { | ||
1835 | ret = -EFAULT; | ||
1836 | goto unlock; | ||
1837 | } | ||
1838 | |||
1839 | ret += size; | ||
1818 | } | 1840 | } |
1841 | unlock: | ||
1842 | mutex_unlock(&ctx->mutex); | ||
1819 | 1843 | ||
1820 | return size; | 1844 | return ret; |
1821 | } | 1845 | } |
1822 | 1846 | ||
1823 | static int perf_event_read_one(struct perf_event *event, | 1847 | static int perf_event_read_one(struct perf_event *event, |
1824 | u64 read_format, char __user *buf) | 1848 | u64 read_format, char __user *buf) |
1825 | { | 1849 | { |
1850 | u64 enabled, running; | ||
1826 | u64 values[4]; | 1851 | u64 values[4]; |
1827 | int n = 0; | 1852 | int n = 0; |
1828 | 1853 | ||
1829 | values[n++] = perf_event_read_value(event); | 1854 | values[n++] = perf_event_read_value(event, &enabled, &running); |
1830 | if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { | 1855 | if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) |
1831 | values[n++] = event->total_time_enabled + | 1856 | values[n++] = enabled; |
1832 | atomic64_read(&event->child_total_time_enabled); | 1857 | if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) |
1833 | } | 1858 | values[n++] = running; |
1834 | if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { | ||
1835 | values[n++] = event->total_time_running + | ||
1836 | atomic64_read(&event->child_total_time_running); | ||
1837 | } | ||
1838 | if (read_format & PERF_FORMAT_ID) | 1859 | if (read_format & PERF_FORMAT_ID) |
1839 | values[n++] = primary_event_id(event); | 1860 | values[n++] = primary_event_id(event); |
1840 | 1861 | ||
@@ -1865,12 +1886,10 @@ perf_read_hw(struct perf_event *event, char __user *buf, size_t count) | |||
1865 | return -ENOSPC; | 1886 | return -ENOSPC; |
1866 | 1887 | ||
1867 | WARN_ON_ONCE(event->ctx->parent_ctx); | 1888 | WARN_ON_ONCE(event->ctx->parent_ctx); |
1868 | mutex_lock(&event->child_mutex); | ||
1869 | if (read_format & PERF_FORMAT_GROUP) | 1889 | if (read_format & PERF_FORMAT_GROUP) |
1870 | ret = perf_event_read_group(event, read_format, buf); | 1890 | ret = perf_event_read_group(event, read_format, buf); |
1871 | else | 1891 | else |
1872 | ret = perf_event_read_one(event, read_format, buf); | 1892 | ret = perf_event_read_one(event, read_format, buf); |
1873 | mutex_unlock(&event->child_mutex); | ||
1874 | 1893 | ||
1875 | return ret; | 1894 | return ret; |
1876 | } | 1895 | } |
@@ -2315,7 +2334,7 @@ perf_mmap_data_init(struct perf_event *event, struct perf_mmap_data *data) | |||
2315 | } | 2334 | } |
2316 | 2335 | ||
2317 | if (!data->watermark) | 2336 | if (!data->watermark) |
2318 | data->watermark = max_t(long, PAGE_SIZE, max_size / 2); | 2337 | data->watermark = max_size / 2; |
2319 | 2338 | ||
2320 | 2339 | ||
2321 | rcu_assign_pointer(event->data, data); | 2340 | rcu_assign_pointer(event->data, data); |
@@ -3245,15 +3264,10 @@ static void perf_event_task_ctx(struct perf_event_context *ctx, | |||
3245 | { | 3264 | { |
3246 | struct perf_event *event; | 3265 | struct perf_event *event; |
3247 | 3266 | ||
3248 | if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list)) | ||
3249 | return; | ||
3250 | |||
3251 | rcu_read_lock(); | ||
3252 | list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { | 3267 | list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { |
3253 | if (perf_event_task_match(event)) | 3268 | if (perf_event_task_match(event)) |
3254 | perf_event_task_output(event, task_event); | 3269 | perf_event_task_output(event, task_event); |
3255 | } | 3270 | } |
3256 | rcu_read_unlock(); | ||
3257 | } | 3271 | } |
3258 | 3272 | ||
3259 | static void perf_event_task_event(struct perf_task_event *task_event) | 3273 | static void perf_event_task_event(struct perf_task_event *task_event) |
@@ -3261,11 +3275,11 @@ static void perf_event_task_event(struct perf_task_event *task_event) | |||
3261 | struct perf_cpu_context *cpuctx; | 3275 | struct perf_cpu_context *cpuctx; |
3262 | struct perf_event_context *ctx = task_event->task_ctx; | 3276 | struct perf_event_context *ctx = task_event->task_ctx; |
3263 | 3277 | ||
3278 | rcu_read_lock(); | ||
3264 | cpuctx = &get_cpu_var(perf_cpu_context); | 3279 | cpuctx = &get_cpu_var(perf_cpu_context); |
3265 | perf_event_task_ctx(&cpuctx->ctx, task_event); | 3280 | perf_event_task_ctx(&cpuctx->ctx, task_event); |
3266 | put_cpu_var(perf_cpu_context); | 3281 | put_cpu_var(perf_cpu_context); |
3267 | 3282 | ||
3268 | rcu_read_lock(); | ||
3269 | if (!ctx) | 3283 | if (!ctx) |
3270 | ctx = rcu_dereference(task_event->task->perf_event_ctxp); | 3284 | ctx = rcu_dereference(task_event->task->perf_event_ctxp); |
3271 | if (ctx) | 3285 | if (ctx) |
@@ -3357,15 +3371,10 @@ static void perf_event_comm_ctx(struct perf_event_context *ctx, | |||
3357 | { | 3371 | { |
3358 | struct perf_event *event; | 3372 | struct perf_event *event; |
3359 | 3373 | ||
3360 | if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list)) | ||
3361 | return; | ||
3362 | |||
3363 | rcu_read_lock(); | ||
3364 | list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { | 3374 | list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { |
3365 | if (perf_event_comm_match(event)) | 3375 | if (perf_event_comm_match(event)) |
3366 | perf_event_comm_output(event, comm_event); | 3376 | perf_event_comm_output(event, comm_event); |
3367 | } | 3377 | } |
3368 | rcu_read_unlock(); | ||
3369 | } | 3378 | } |
3370 | 3379 | ||
3371 | static void perf_event_comm_event(struct perf_comm_event *comm_event) | 3380 | static void perf_event_comm_event(struct perf_comm_event *comm_event) |
@@ -3376,7 +3385,7 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event) | |||
3376 | char comm[TASK_COMM_LEN]; | 3385 | char comm[TASK_COMM_LEN]; |
3377 | 3386 | ||
3378 | memset(comm, 0, sizeof(comm)); | 3387 | memset(comm, 0, sizeof(comm)); |
3379 | strncpy(comm, comm_event->task->comm, sizeof(comm)); | 3388 | strlcpy(comm, comm_event->task->comm, sizeof(comm)); |
3380 | size = ALIGN(strlen(comm)+1, sizeof(u64)); | 3389 | size = ALIGN(strlen(comm)+1, sizeof(u64)); |
3381 | 3390 | ||
3382 | comm_event->comm = comm; | 3391 | comm_event->comm = comm; |
@@ -3384,11 +3393,11 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event) | |||
3384 | 3393 | ||
3385 | comm_event->event_id.header.size = sizeof(comm_event->event_id) + size; | 3394 | comm_event->event_id.header.size = sizeof(comm_event->event_id) + size; |
3386 | 3395 | ||
3396 | rcu_read_lock(); | ||
3387 | cpuctx = &get_cpu_var(perf_cpu_context); | 3397 | cpuctx = &get_cpu_var(perf_cpu_context); |
3388 | perf_event_comm_ctx(&cpuctx->ctx, comm_event); | 3398 | perf_event_comm_ctx(&cpuctx->ctx, comm_event); |
3389 | put_cpu_var(perf_cpu_context); | 3399 | put_cpu_var(perf_cpu_context); |
3390 | 3400 | ||
3391 | rcu_read_lock(); | ||
3392 | /* | 3401 | /* |
3393 | * doesn't really matter which of the child contexts the | 3402 | * doesn't really matter which of the child contexts the |
3394 | * events ends up in. | 3403 | * events ends up in. |
@@ -3481,15 +3490,10 @@ static void perf_event_mmap_ctx(struct perf_event_context *ctx, | |||
3481 | { | 3490 | { |
3482 | struct perf_event *event; | 3491 | struct perf_event *event; |
3483 | 3492 | ||
3484 | if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list)) | ||
3485 | return; | ||
3486 | |||
3487 | rcu_read_lock(); | ||
3488 | list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { | 3493 | list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { |
3489 | if (perf_event_mmap_match(event, mmap_event)) | 3494 | if (perf_event_mmap_match(event, mmap_event)) |
3490 | perf_event_mmap_output(event, mmap_event); | 3495 | perf_event_mmap_output(event, mmap_event); |
3491 | } | 3496 | } |
3492 | rcu_read_unlock(); | ||
3493 | } | 3497 | } |
3494 | 3498 | ||
3495 | static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) | 3499 | static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) |
@@ -3545,11 +3549,11 @@ got_name: | |||
3545 | 3549 | ||
3546 | mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size; | 3550 | mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size; |
3547 | 3551 | ||
3552 | rcu_read_lock(); | ||
3548 | cpuctx = &get_cpu_var(perf_cpu_context); | 3553 | cpuctx = &get_cpu_var(perf_cpu_context); |
3549 | perf_event_mmap_ctx(&cpuctx->ctx, mmap_event); | 3554 | perf_event_mmap_ctx(&cpuctx->ctx, mmap_event); |
3550 | put_cpu_var(perf_cpu_context); | 3555 | put_cpu_var(perf_cpu_context); |
3551 | 3556 | ||
3552 | rcu_read_lock(); | ||
3553 | /* | 3557 | /* |
3554 | * doesn't really matter which of the child contexts the | 3558 | * doesn't really matter which of the child contexts the |
3555 | * events ends up in. | 3559 | * events ends up in. |
@@ -3688,7 +3692,11 @@ static int __perf_event_overflow(struct perf_event *event, int nmi, | |||
3688 | perf_event_disable(event); | 3692 | perf_event_disable(event); |
3689 | } | 3693 | } |
3690 | 3694 | ||
3691 | perf_event_output(event, nmi, data, regs); | 3695 | if (event->overflow_handler) |
3696 | event->overflow_handler(event, nmi, data, regs); | ||
3697 | else | ||
3698 | perf_event_output(event, nmi, data, regs); | ||
3699 | |||
3692 | return ret; | 3700 | return ret; |
3693 | } | 3701 | } |
3694 | 3702 | ||
@@ -3733,16 +3741,16 @@ again: | |||
3733 | return nr; | 3741 | return nr; |
3734 | } | 3742 | } |
3735 | 3743 | ||
3736 | static void perf_swevent_overflow(struct perf_event *event, | 3744 | static void perf_swevent_overflow(struct perf_event *event, u64 overflow, |
3737 | int nmi, struct perf_sample_data *data, | 3745 | int nmi, struct perf_sample_data *data, |
3738 | struct pt_regs *regs) | 3746 | struct pt_regs *regs) |
3739 | { | 3747 | { |
3740 | struct hw_perf_event *hwc = &event->hw; | 3748 | struct hw_perf_event *hwc = &event->hw; |
3741 | int throttle = 0; | 3749 | int throttle = 0; |
3742 | u64 overflow; | ||
3743 | 3750 | ||
3744 | data->period = event->hw.last_period; | 3751 | data->period = event->hw.last_period; |
3745 | overflow = perf_swevent_set_period(event); | 3752 | if (!overflow) |
3753 | overflow = perf_swevent_set_period(event); | ||
3746 | 3754 | ||
3747 | if (hwc->interrupts == MAX_INTERRUPTS) | 3755 | if (hwc->interrupts == MAX_INTERRUPTS) |
3748 | return; | 3756 | return; |
@@ -3775,14 +3783,19 @@ static void perf_swevent_add(struct perf_event *event, u64 nr, | |||
3775 | 3783 | ||
3776 | atomic64_add(nr, &event->count); | 3784 | atomic64_add(nr, &event->count); |
3777 | 3785 | ||
3786 | if (!regs) | ||
3787 | return; | ||
3788 | |||
3778 | if (!hwc->sample_period) | 3789 | if (!hwc->sample_period) |
3779 | return; | 3790 | return; |
3780 | 3791 | ||
3781 | if (!regs) | 3792 | if (nr == 1 && hwc->sample_period == 1 && !event->attr.freq) |
3793 | return perf_swevent_overflow(event, 1, nmi, data, regs); | ||
3794 | |||
3795 | if (atomic64_add_negative(nr, &hwc->period_left)) | ||
3782 | return; | 3796 | return; |
3783 | 3797 | ||
3784 | if (!atomic64_add_negative(nr, &hwc->period_left)) | 3798 | perf_swevent_overflow(event, 0, nmi, data, regs); |
3785 | perf_swevent_overflow(event, nmi, data, regs); | ||
3786 | } | 3799 | } |
3787 | 3800 | ||
3788 | static int perf_swevent_is_counting(struct perf_event *event) | 3801 | static int perf_swevent_is_counting(struct perf_event *event) |
@@ -3818,6 +3831,20 @@ static int perf_swevent_is_counting(struct perf_event *event) | |||
3818 | static int perf_tp_event_match(struct perf_event *event, | 3831 | static int perf_tp_event_match(struct perf_event *event, |
3819 | struct perf_sample_data *data); | 3832 | struct perf_sample_data *data); |
3820 | 3833 | ||
3834 | static int perf_exclude_event(struct perf_event *event, | ||
3835 | struct pt_regs *regs) | ||
3836 | { | ||
3837 | if (regs) { | ||
3838 | if (event->attr.exclude_user && user_mode(regs)) | ||
3839 | return 1; | ||
3840 | |||
3841 | if (event->attr.exclude_kernel && !user_mode(regs)) | ||
3842 | return 1; | ||
3843 | } | ||
3844 | |||
3845 | return 0; | ||
3846 | } | ||
3847 | |||
3821 | static int perf_swevent_match(struct perf_event *event, | 3848 | static int perf_swevent_match(struct perf_event *event, |
3822 | enum perf_type_id type, | 3849 | enum perf_type_id type, |
3823 | u32 event_id, | 3850 | u32 event_id, |
@@ -3829,16 +3856,12 @@ static int perf_swevent_match(struct perf_event *event, | |||
3829 | 3856 | ||
3830 | if (event->attr.type != type) | 3857 | if (event->attr.type != type) |
3831 | return 0; | 3858 | return 0; |
3859 | |||
3832 | if (event->attr.config != event_id) | 3860 | if (event->attr.config != event_id) |
3833 | return 0; | 3861 | return 0; |
3834 | 3862 | ||
3835 | if (regs) { | 3863 | if (perf_exclude_event(event, regs)) |
3836 | if (event->attr.exclude_user && user_mode(regs)) | 3864 | return 0; |
3837 | return 0; | ||
3838 | |||
3839 | if (event->attr.exclude_kernel && !user_mode(regs)) | ||
3840 | return 0; | ||
3841 | } | ||
3842 | 3865 | ||
3843 | if (event->attr.type == PERF_TYPE_TRACEPOINT && | 3866 | if (event->attr.type == PERF_TYPE_TRACEPOINT && |
3844 | !perf_tp_event_match(event, data)) | 3867 | !perf_tp_event_match(event, data)) |
@@ -3855,49 +3878,59 @@ static void perf_swevent_ctx_event(struct perf_event_context *ctx, | |||
3855 | { | 3878 | { |
3856 | struct perf_event *event; | 3879 | struct perf_event *event; |
3857 | 3880 | ||
3858 | if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list)) | ||
3859 | return; | ||
3860 | |||
3861 | rcu_read_lock(); | ||
3862 | list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { | 3881 | list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { |
3863 | if (perf_swevent_match(event, type, event_id, data, regs)) | 3882 | if (perf_swevent_match(event, type, event_id, data, regs)) |
3864 | perf_swevent_add(event, nr, nmi, data, regs); | 3883 | perf_swevent_add(event, nr, nmi, data, regs); |
3865 | } | 3884 | } |
3866 | rcu_read_unlock(); | ||
3867 | } | 3885 | } |
3868 | 3886 | ||
3869 | static int *perf_swevent_recursion_context(struct perf_cpu_context *cpuctx) | 3887 | int perf_swevent_get_recursion_context(void) |
3870 | { | 3888 | { |
3889 | struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context); | ||
3890 | int rctx; | ||
3891 | |||
3871 | if (in_nmi()) | 3892 | if (in_nmi()) |
3872 | return &cpuctx->recursion[3]; | 3893 | rctx = 3; |
3894 | else if (in_irq()) | ||
3895 | rctx = 2; | ||
3896 | else if (in_softirq()) | ||
3897 | rctx = 1; | ||
3898 | else | ||
3899 | rctx = 0; | ||
3900 | |||
3901 | if (cpuctx->recursion[rctx]) { | ||
3902 | put_cpu_var(perf_cpu_context); | ||
3903 | return -1; | ||
3904 | } | ||
3873 | 3905 | ||
3874 | if (in_irq()) | 3906 | cpuctx->recursion[rctx]++; |
3875 | return &cpuctx->recursion[2]; | 3907 | barrier(); |
3876 | 3908 | ||
3877 | if (in_softirq()) | 3909 | return rctx; |
3878 | return &cpuctx->recursion[1]; | 3910 | } |
3911 | EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context); | ||
3879 | 3912 | ||
3880 | return &cpuctx->recursion[0]; | 3913 | void perf_swevent_put_recursion_context(int rctx) |
3914 | { | ||
3915 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); | ||
3916 | barrier(); | ||
3917 | cpuctx->recursion[rctx]++; | ||
3918 | put_cpu_var(perf_cpu_context); | ||
3881 | } | 3919 | } |
3920 | EXPORT_SYMBOL_GPL(perf_swevent_put_recursion_context); | ||
3882 | 3921 | ||
3883 | static void do_perf_sw_event(enum perf_type_id type, u32 event_id, | 3922 | static void do_perf_sw_event(enum perf_type_id type, u32 event_id, |
3884 | u64 nr, int nmi, | 3923 | u64 nr, int nmi, |
3885 | struct perf_sample_data *data, | 3924 | struct perf_sample_data *data, |
3886 | struct pt_regs *regs) | 3925 | struct pt_regs *regs) |
3887 | { | 3926 | { |
3888 | struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context); | 3927 | struct perf_cpu_context *cpuctx; |
3889 | int *recursion = perf_swevent_recursion_context(cpuctx); | ||
3890 | struct perf_event_context *ctx; | 3928 | struct perf_event_context *ctx; |
3891 | 3929 | ||
3892 | if (*recursion) | 3930 | cpuctx = &__get_cpu_var(perf_cpu_context); |
3893 | goto out; | 3931 | rcu_read_lock(); |
3894 | |||
3895 | (*recursion)++; | ||
3896 | barrier(); | ||
3897 | |||
3898 | perf_swevent_ctx_event(&cpuctx->ctx, type, event_id, | 3932 | perf_swevent_ctx_event(&cpuctx->ctx, type, event_id, |
3899 | nr, nmi, data, regs); | 3933 | nr, nmi, data, regs); |
3900 | rcu_read_lock(); | ||
3901 | /* | 3934 | /* |
3902 | * doesn't really matter which of the child contexts the | 3935 | * doesn't really matter which of the child contexts the |
3903 | * events ends up in. | 3936 | * events ends up in. |
@@ -3906,23 +3939,24 @@ static void do_perf_sw_event(enum perf_type_id type, u32 event_id, | |||
3906 | if (ctx) | 3939 | if (ctx) |
3907 | perf_swevent_ctx_event(ctx, type, event_id, nr, nmi, data, regs); | 3940 | perf_swevent_ctx_event(ctx, type, event_id, nr, nmi, data, regs); |
3908 | rcu_read_unlock(); | 3941 | rcu_read_unlock(); |
3909 | |||
3910 | barrier(); | ||
3911 | (*recursion)--; | ||
3912 | |||
3913 | out: | ||
3914 | put_cpu_var(perf_cpu_context); | ||
3915 | } | 3942 | } |
3916 | 3943 | ||
3917 | void __perf_sw_event(u32 event_id, u64 nr, int nmi, | 3944 | void __perf_sw_event(u32 event_id, u64 nr, int nmi, |
3918 | struct pt_regs *regs, u64 addr) | 3945 | struct pt_regs *regs, u64 addr) |
3919 | { | 3946 | { |
3920 | struct perf_sample_data data = { | 3947 | struct perf_sample_data data; |
3921 | .addr = addr, | 3948 | int rctx; |
3922 | }; | ||
3923 | 3949 | ||
3924 | do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, nmi, | 3950 | rctx = perf_swevent_get_recursion_context(); |
3925 | &data, regs); | 3951 | if (rctx < 0) |
3952 | return; | ||
3953 | |||
3954 | data.addr = addr; | ||
3955 | data.raw = NULL; | ||
3956 | |||
3957 | do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, nmi, &data, regs); | ||
3958 | |||
3959 | perf_swevent_put_recursion_context(rctx); | ||
3926 | } | 3960 | } |
3927 | 3961 | ||
3928 | static void perf_swevent_read(struct perf_event *event) | 3962 | static void perf_swevent_read(struct perf_event *event) |
@@ -4145,6 +4179,7 @@ void perf_tp_event(int event_id, u64 addr, u64 count, void *record, | |||
4145 | if (!regs) | 4179 | if (!regs) |
4146 | regs = task_pt_regs(current); | 4180 | regs = task_pt_regs(current); |
4147 | 4181 | ||
4182 | /* Trace events already protected against recursion */ | ||
4148 | do_perf_sw_event(PERF_TYPE_TRACEPOINT, event_id, count, 1, | 4183 | do_perf_sw_event(PERF_TYPE_TRACEPOINT, event_id, count, 1, |
4149 | &data, regs); | 4184 | &data, regs); |
4150 | } | 4185 | } |
@@ -4231,6 +4266,57 @@ static void perf_event_free_filter(struct perf_event *event) | |||
4231 | 4266 | ||
4232 | #endif /* CONFIG_EVENT_PROFILE */ | 4267 | #endif /* CONFIG_EVENT_PROFILE */ |
4233 | 4268 | ||
4269 | #ifdef CONFIG_HAVE_HW_BREAKPOINT | ||
4270 | static void bp_perf_event_destroy(struct perf_event *event) | ||
4271 | { | ||
4272 | release_bp_slot(event); | ||
4273 | } | ||
4274 | |||
4275 | static const struct pmu *bp_perf_event_init(struct perf_event *bp) | ||
4276 | { | ||
4277 | int err; | ||
4278 | /* | ||
4279 | * The breakpoint is already filled if we haven't created the counter | ||
4280 | * through perf syscall | ||
4281 | * FIXME: manage to get trigerred to NULL if it comes from syscalls | ||
4282 | */ | ||
4283 | if (!bp->callback) | ||
4284 | err = register_perf_hw_breakpoint(bp); | ||
4285 | else | ||
4286 | err = __register_perf_hw_breakpoint(bp); | ||
4287 | if (err) | ||
4288 | return ERR_PTR(err); | ||
4289 | |||
4290 | bp->destroy = bp_perf_event_destroy; | ||
4291 | |||
4292 | return &perf_ops_bp; | ||
4293 | } | ||
4294 | |||
4295 | void perf_bp_event(struct perf_event *bp, void *data) | ||
4296 | { | ||
4297 | struct perf_sample_data sample; | ||
4298 | struct pt_regs *regs = data; | ||
4299 | |||
4300 | sample.addr = bp->attr.bp_addr; | ||
4301 | |||
4302 | if (!perf_exclude_event(bp, regs)) | ||
4303 | perf_swevent_add(bp, 1, 1, &sample, regs); | ||
4304 | } | ||
4305 | #else | ||
4306 | static void bp_perf_event_destroy(struct perf_event *event) | ||
4307 | { | ||
4308 | } | ||
4309 | |||
4310 | static const struct pmu *bp_perf_event_init(struct perf_event *bp) | ||
4311 | { | ||
4312 | return NULL; | ||
4313 | } | ||
4314 | |||
4315 | void perf_bp_event(struct perf_event *bp, void *regs) | ||
4316 | { | ||
4317 | } | ||
4318 | #endif | ||
4319 | |||
4234 | atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX]; | 4320 | atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX]; |
4235 | 4321 | ||
4236 | static void sw_perf_event_destroy(struct perf_event *event) | 4322 | static void sw_perf_event_destroy(struct perf_event *event) |
@@ -4297,6 +4383,7 @@ perf_event_alloc(struct perf_event_attr *attr, | |||
4297 | struct perf_event_context *ctx, | 4383 | struct perf_event_context *ctx, |
4298 | struct perf_event *group_leader, | 4384 | struct perf_event *group_leader, |
4299 | struct perf_event *parent_event, | 4385 | struct perf_event *parent_event, |
4386 | perf_callback_t callback, | ||
4300 | gfp_t gfpflags) | 4387 | gfp_t gfpflags) |
4301 | { | 4388 | { |
4302 | const struct pmu *pmu; | 4389 | const struct pmu *pmu; |
@@ -4339,6 +4426,11 @@ perf_event_alloc(struct perf_event_attr *attr, | |||
4339 | 4426 | ||
4340 | event->state = PERF_EVENT_STATE_INACTIVE; | 4427 | event->state = PERF_EVENT_STATE_INACTIVE; |
4341 | 4428 | ||
4429 | if (!callback && parent_event) | ||
4430 | callback = parent_event->callback; | ||
4431 | |||
4432 | event->callback = callback; | ||
4433 | |||
4342 | if (attr->disabled) | 4434 | if (attr->disabled) |
4343 | event->state = PERF_EVENT_STATE_OFF; | 4435 | event->state = PERF_EVENT_STATE_OFF; |
4344 | 4436 | ||
@@ -4373,6 +4465,11 @@ perf_event_alloc(struct perf_event_attr *attr, | |||
4373 | pmu = tp_perf_event_init(event); | 4465 | pmu = tp_perf_event_init(event); |
4374 | break; | 4466 | break; |
4375 | 4467 | ||
4468 | case PERF_TYPE_BREAKPOINT: | ||
4469 | pmu = bp_perf_event_init(event); | ||
4470 | break; | ||
4471 | |||
4472 | |||
4376 | default: | 4473 | default: |
4377 | break; | 4474 | break; |
4378 | } | 4475 | } |
@@ -4615,7 +4712,7 @@ SYSCALL_DEFINE5(perf_event_open, | |||
4615 | } | 4712 | } |
4616 | 4713 | ||
4617 | event = perf_event_alloc(&attr, cpu, ctx, group_leader, | 4714 | event = perf_event_alloc(&attr, cpu, ctx, group_leader, |
4618 | NULL, GFP_KERNEL); | 4715 | NULL, NULL, GFP_KERNEL); |
4619 | err = PTR_ERR(event); | 4716 | err = PTR_ERR(event); |
4620 | if (IS_ERR(event)) | 4717 | if (IS_ERR(event)) |
4621 | goto err_put_context; | 4718 | goto err_put_context; |
@@ -4663,6 +4760,58 @@ err_put_context: | |||
4663 | return err; | 4760 | return err; |
4664 | } | 4761 | } |
4665 | 4762 | ||
4763 | /** | ||
4764 | * perf_event_create_kernel_counter | ||
4765 | * | ||
4766 | * @attr: attributes of the counter to create | ||
4767 | * @cpu: cpu in which the counter is bound | ||
4768 | * @pid: task to profile | ||
4769 | */ | ||
4770 | struct perf_event * | ||
4771 | perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, | ||
4772 | pid_t pid, perf_callback_t callback) | ||
4773 | { | ||
4774 | struct perf_event *event; | ||
4775 | struct perf_event_context *ctx; | ||
4776 | int err; | ||
4777 | |||
4778 | /* | ||
4779 | * Get the target context (task or percpu): | ||
4780 | */ | ||
4781 | |||
4782 | ctx = find_get_context(pid, cpu); | ||
4783 | if (IS_ERR(ctx)) | ||
4784 | return NULL; | ||
4785 | |||
4786 | event = perf_event_alloc(attr, cpu, ctx, NULL, | ||
4787 | NULL, callback, GFP_KERNEL); | ||
4788 | err = PTR_ERR(event); | ||
4789 | if (IS_ERR(event)) | ||
4790 | goto err_put_context; | ||
4791 | |||
4792 | event->filp = NULL; | ||
4793 | WARN_ON_ONCE(ctx->parent_ctx); | ||
4794 | mutex_lock(&ctx->mutex); | ||
4795 | perf_install_in_context(ctx, event, cpu); | ||
4796 | ++ctx->generation; | ||
4797 | mutex_unlock(&ctx->mutex); | ||
4798 | |||
4799 | event->owner = current; | ||
4800 | get_task_struct(current); | ||
4801 | mutex_lock(¤t->perf_event_mutex); | ||
4802 | list_add_tail(&event->owner_entry, ¤t->perf_event_list); | ||
4803 | mutex_unlock(¤t->perf_event_mutex); | ||
4804 | |||
4805 | return event; | ||
4806 | |||
4807 | err_put_context: | ||
4808 | if (err < 0) | ||
4809 | put_ctx(ctx); | ||
4810 | |||
4811 | return NULL; | ||
4812 | } | ||
4813 | EXPORT_SYMBOL_GPL(perf_event_create_kernel_counter); | ||
4814 | |||
4666 | /* | 4815 | /* |
4667 | * inherit a event from parent task to child task: | 4816 | * inherit a event from parent task to child task: |
4668 | */ | 4817 | */ |
@@ -4688,7 +4837,7 @@ inherit_event(struct perf_event *parent_event, | |||
4688 | child_event = perf_event_alloc(&parent_event->attr, | 4837 | child_event = perf_event_alloc(&parent_event->attr, |
4689 | parent_event->cpu, child_ctx, | 4838 | parent_event->cpu, child_ctx, |
4690 | group_leader, parent_event, | 4839 | group_leader, parent_event, |
4691 | GFP_KERNEL); | 4840 | NULL, GFP_KERNEL); |
4692 | if (IS_ERR(child_event)) | 4841 | if (IS_ERR(child_event)) |
4693 | return child_event; | 4842 | return child_event; |
4694 | get_ctx(child_ctx); | 4843 | get_ctx(child_ctx); |
@@ -4706,6 +4855,8 @@ inherit_event(struct perf_event *parent_event, | |||
4706 | if (parent_event->attr.freq) | 4855 | if (parent_event->attr.freq) |
4707 | child_event->hw.sample_period = parent_event->hw.sample_period; | 4856 | child_event->hw.sample_period = parent_event->hw.sample_period; |
4708 | 4857 | ||
4858 | child_event->overflow_handler = parent_event->overflow_handler; | ||
4859 | |||
4709 | /* | 4860 | /* |
4710 | * Link it up in the child's context: | 4861 | * Link it up in the child's context: |
4711 | */ | 4862 | */ |
@@ -4795,7 +4946,6 @@ __perf_event_exit_task(struct perf_event *child_event, | |||
4795 | { | 4946 | { |
4796 | struct perf_event *parent_event; | 4947 | struct perf_event *parent_event; |
4797 | 4948 | ||
4798 | update_event_times(child_event); | ||
4799 | perf_event_remove_from_context(child_event); | 4949 | perf_event_remove_from_context(child_event); |
4800 | 4950 | ||
4801 | parent_event = child_event->parent; | 4951 | parent_event = child_event->parent; |
@@ -4847,6 +4997,7 @@ void perf_event_exit_task(struct task_struct *child) | |||
4847 | * the events from it. | 4997 | * the events from it. |
4848 | */ | 4998 | */ |
4849 | unclone_ctx(child_ctx); | 4999 | unclone_ctx(child_ctx); |
5000 | update_context_time(child_ctx); | ||
4850 | spin_unlock_irqrestore(&child_ctx->lock, flags); | 5001 | spin_unlock_irqrestore(&child_ctx->lock, flags); |
4851 | 5002 | ||
4852 | /* | 5003 | /* |
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index f05671609a89..d006554888dc 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig | |||
@@ -339,6 +339,27 @@ config POWER_TRACER | |||
339 | power management decisions, specifically the C-state and P-state | 339 | power management decisions, specifically the C-state and P-state |
340 | behavior. | 340 | behavior. |
341 | 341 | ||
342 | config KSYM_TRACER | ||
343 | bool "Trace read and write access on kernel memory locations" | ||
344 | depends on HAVE_HW_BREAKPOINT | ||
345 | select TRACING | ||
346 | help | ||
347 | This tracer helps find read and write operations on any given kernel | ||
348 | symbol i.e. /proc/kallsyms. | ||
349 | |||
350 | config PROFILE_KSYM_TRACER | ||
351 | bool "Profile all kernel memory accesses on 'watched' variables" | ||
352 | depends on KSYM_TRACER | ||
353 | help | ||
354 | This tracer profiles kernel accesses on variables watched through the | ||
355 | ksym tracer ftrace plugin. Depending upon the hardware, all read | ||
356 | and write operations on kernel variables can be monitored for | ||
357 | accesses. | ||
358 | |||
359 | The results will be displayed in: | ||
360 | /debugfs/tracing/profile_ksym | ||
361 | |||
362 | Say N if unsure. | ||
342 | 363 | ||
343 | config STACK_TRACER | 364 | config STACK_TRACER |
344 | bool "Trace max stack" | 365 | bool "Trace max stack" |
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index edc3a3cca1a1..cd9ecd89ec77 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile | |||
@@ -54,6 +54,7 @@ obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o | |||
54 | obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o | 54 | obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o |
55 | obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o | 55 | obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o |
56 | obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o | 56 | obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o |
57 | obj-$(CONFIG_KSYM_TRACER) += trace_ksym.o | ||
57 | obj-$(CONFIG_EVENT_TRACING) += power-traces.o | 58 | obj-$(CONFIG_EVENT_TRACING) += power-traces.o |
58 | 59 | ||
59 | libftrace-y := ftrace.o | 60 | libftrace-y := ftrace.o |
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index b4e4212e66d7..4da6ede74401 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h | |||
@@ -11,6 +11,7 @@ | |||
11 | #include <linux/ftrace.h> | 11 | #include <linux/ftrace.h> |
12 | #include <trace/boot.h> | 12 | #include <trace/boot.h> |
13 | #include <linux/kmemtrace.h> | 13 | #include <linux/kmemtrace.h> |
14 | #include <linux/hw_breakpoint.h> | ||
14 | 15 | ||
15 | #include <linux/trace_seq.h> | 16 | #include <linux/trace_seq.h> |
16 | #include <linux/ftrace_event.h> | 17 | #include <linux/ftrace_event.h> |
@@ -37,6 +38,7 @@ enum trace_type { | |||
37 | TRACE_KMEM_ALLOC, | 38 | TRACE_KMEM_ALLOC, |
38 | TRACE_KMEM_FREE, | 39 | TRACE_KMEM_FREE, |
39 | TRACE_BLK, | 40 | TRACE_BLK, |
41 | TRACE_KSYM, | ||
40 | 42 | ||
41 | __TRACE_LAST_TYPE, | 43 | __TRACE_LAST_TYPE, |
42 | }; | 44 | }; |
@@ -232,6 +234,7 @@ extern void __ftrace_bad_type(void); | |||
232 | TRACE_KMEM_ALLOC); \ | 234 | TRACE_KMEM_ALLOC); \ |
233 | IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \ | 235 | IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \ |
234 | TRACE_KMEM_FREE); \ | 236 | TRACE_KMEM_FREE); \ |
237 | IF_ASSIGN(var, ent, struct ksym_trace_entry, TRACE_KSYM);\ | ||
235 | __ftrace_bad_type(); \ | 238 | __ftrace_bad_type(); \ |
236 | } while (0) | 239 | } while (0) |
237 | 240 | ||
@@ -387,6 +390,8 @@ int register_tracer(struct tracer *type); | |||
387 | void unregister_tracer(struct tracer *type); | 390 | void unregister_tracer(struct tracer *type); |
388 | int is_tracing_stopped(void); | 391 | int is_tracing_stopped(void); |
389 | 392 | ||
393 | extern int process_new_ksym_entry(char *ksymname, int op, unsigned long addr); | ||
394 | |||
390 | extern unsigned long nsecs_to_usecs(unsigned long nsecs); | 395 | extern unsigned long nsecs_to_usecs(unsigned long nsecs); |
391 | 396 | ||
392 | #ifdef CONFIG_TRACER_MAX_TRACE | 397 | #ifdef CONFIG_TRACER_MAX_TRACE |
@@ -461,6 +466,8 @@ extern int trace_selftest_startup_branch(struct tracer *trace, | |||
461 | struct trace_array *tr); | 466 | struct trace_array *tr); |
462 | extern int trace_selftest_startup_hw_branches(struct tracer *trace, | 467 | extern int trace_selftest_startup_hw_branches(struct tracer *trace, |
463 | struct trace_array *tr); | 468 | struct trace_array *tr); |
469 | extern int trace_selftest_startup_ksym(struct tracer *trace, | ||
470 | struct trace_array *tr); | ||
464 | #endif /* CONFIG_FTRACE_STARTUP_TEST */ | 471 | #endif /* CONFIG_FTRACE_STARTUP_TEST */ |
465 | 472 | ||
466 | extern void *head_page(struct trace_array_cpu *data); | 473 | extern void *head_page(struct trace_array_cpu *data); |
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h index ead3d724599d..c16a08f399df 100644 --- a/kernel/trace/trace_entries.h +++ b/kernel/trace/trace_entries.h | |||
@@ -364,3 +364,19 @@ FTRACE_ENTRY(kmem_free, kmemtrace_free_entry, | |||
364 | F_printk("type:%u call_site:%lx ptr:%p", | 364 | F_printk("type:%u call_site:%lx ptr:%p", |
365 | __entry->type_id, __entry->call_site, __entry->ptr) | 365 | __entry->type_id, __entry->call_site, __entry->ptr) |
366 | ); | 366 | ); |
367 | |||
368 | FTRACE_ENTRY(ksym_trace, ksym_trace_entry, | ||
369 | |||
370 | TRACE_KSYM, | ||
371 | |||
372 | F_STRUCT( | ||
373 | __field( unsigned long, ip ) | ||
374 | __field( unsigned char, type ) | ||
375 | __array( char , cmd, TASK_COMM_LEN ) | ||
376 | __field( unsigned long, addr ) | ||
377 | ), | ||
378 | |||
379 | F_printk("ip: %pF type: %d ksym_name: %pS cmd: %s", | ||
380 | (void *)__entry->ip, (unsigned int)__entry->type, | ||
381 | (void *)__entry->addr, __entry->cmd) | ||
382 | ); | ||
diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c index e0d351b01f5a..d9c60f80aa0d 100644 --- a/kernel/trace/trace_event_profile.c +++ b/kernel/trace/trace_event_profile.c | |||
@@ -9,31 +9,33 @@ | |||
9 | #include "trace.h" | 9 | #include "trace.h" |
10 | 10 | ||
11 | 11 | ||
12 | struct perf_trace_buf *perf_trace_buf; | 12 | char *perf_trace_buf; |
13 | EXPORT_SYMBOL_GPL(perf_trace_buf); | 13 | EXPORT_SYMBOL_GPL(perf_trace_buf); |
14 | 14 | ||
15 | struct perf_trace_buf *perf_trace_buf_nmi; | 15 | char *perf_trace_buf_nmi; |
16 | EXPORT_SYMBOL_GPL(perf_trace_buf_nmi); | 16 | EXPORT_SYMBOL_GPL(perf_trace_buf_nmi); |
17 | 17 | ||
18 | typedef typeof(char [FTRACE_MAX_PROFILE_SIZE]) perf_trace_t ; | ||
19 | |||
18 | /* Count the events in use (per event id, not per instance) */ | 20 | /* Count the events in use (per event id, not per instance) */ |
19 | static int total_profile_count; | 21 | static int total_profile_count; |
20 | 22 | ||
21 | static int ftrace_profile_enable_event(struct ftrace_event_call *event) | 23 | static int ftrace_profile_enable_event(struct ftrace_event_call *event) |
22 | { | 24 | { |
23 | struct perf_trace_buf *buf; | 25 | char *buf; |
24 | int ret = -ENOMEM; | 26 | int ret = -ENOMEM; |
25 | 27 | ||
26 | if (atomic_inc_return(&event->profile_count)) | 28 | if (atomic_inc_return(&event->profile_count)) |
27 | return 0; | 29 | return 0; |
28 | 30 | ||
29 | if (!total_profile_count) { | 31 | if (!total_profile_count) { |
30 | buf = alloc_percpu(struct perf_trace_buf); | 32 | buf = (char *)alloc_percpu(perf_trace_t); |
31 | if (!buf) | 33 | if (!buf) |
32 | goto fail_buf; | 34 | goto fail_buf; |
33 | 35 | ||
34 | rcu_assign_pointer(perf_trace_buf, buf); | 36 | rcu_assign_pointer(perf_trace_buf, buf); |
35 | 37 | ||
36 | buf = alloc_percpu(struct perf_trace_buf); | 38 | buf = (char *)alloc_percpu(perf_trace_t); |
37 | if (!buf) | 39 | if (!buf) |
38 | goto fail_buf_nmi; | 40 | goto fail_buf_nmi; |
39 | 41 | ||
@@ -79,7 +81,7 @@ int ftrace_profile_enable(int event_id) | |||
79 | 81 | ||
80 | static void ftrace_profile_disable_event(struct ftrace_event_call *event) | 82 | static void ftrace_profile_disable_event(struct ftrace_event_call *event) |
81 | { | 83 | { |
82 | struct perf_trace_buf *buf, *nmi_buf; | 84 | char *buf, *nmi_buf; |
83 | 85 | ||
84 | if (!atomic_add_negative(-1, &event->profile_count)) | 86 | if (!atomic_add_negative(-1, &event->profile_count)) |
85 | return; | 87 | return; |
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 3696476f307d..79ce6a2bd74f 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c | |||
@@ -1208,11 +1208,12 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp, | |||
1208 | struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); | 1208 | struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); |
1209 | struct ftrace_event_call *call = &tp->call; | 1209 | struct ftrace_event_call *call = &tp->call; |
1210 | struct kprobe_trace_entry *entry; | 1210 | struct kprobe_trace_entry *entry; |
1211 | struct perf_trace_buf *trace_buf; | ||
1212 | struct trace_entry *ent; | 1211 | struct trace_entry *ent; |
1213 | int size, __size, i, pc, __cpu; | 1212 | int size, __size, i, pc, __cpu; |
1214 | unsigned long irq_flags; | 1213 | unsigned long irq_flags; |
1214 | char *trace_buf; | ||
1215 | char *raw_data; | 1215 | char *raw_data; |
1216 | int rctx; | ||
1216 | 1217 | ||
1217 | pc = preempt_count(); | 1218 | pc = preempt_count(); |
1218 | __size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args); | 1219 | __size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args); |
@@ -1227,6 +1228,11 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp, | |||
1227 | * This also protects the rcu read side | 1228 | * This also protects the rcu read side |
1228 | */ | 1229 | */ |
1229 | local_irq_save(irq_flags); | 1230 | local_irq_save(irq_flags); |
1231 | |||
1232 | rctx = perf_swevent_get_recursion_context(); | ||
1233 | if (rctx < 0) | ||
1234 | goto end_recursion; | ||
1235 | |||
1230 | __cpu = smp_processor_id(); | 1236 | __cpu = smp_processor_id(); |
1231 | 1237 | ||
1232 | if (in_nmi()) | 1238 | if (in_nmi()) |
@@ -1237,18 +1243,7 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp, | |||
1237 | if (!trace_buf) | 1243 | if (!trace_buf) |
1238 | goto end; | 1244 | goto end; |
1239 | 1245 | ||
1240 | trace_buf = per_cpu_ptr(trace_buf, __cpu); | 1246 | raw_data = per_cpu_ptr(trace_buf, __cpu); |
1241 | |||
1242 | if (trace_buf->recursion++) | ||
1243 | goto end_recursion; | ||
1244 | |||
1245 | /* | ||
1246 | * Make recursion update visible before entering perf_tp_event | ||
1247 | * so that we protect from perf recursions. | ||
1248 | */ | ||
1249 | barrier(); | ||
1250 | |||
1251 | raw_data = trace_buf->buf; | ||
1252 | 1247 | ||
1253 | /* Zero dead bytes from alignment to avoid buffer leak to userspace */ | 1248 | /* Zero dead bytes from alignment to avoid buffer leak to userspace */ |
1254 | *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; | 1249 | *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; |
@@ -1263,9 +1258,9 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp, | |||
1263 | entry->args[i] = call_fetch(&tp->args[i].fetch, regs); | 1258 | entry->args[i] = call_fetch(&tp->args[i].fetch, regs); |
1264 | perf_tp_event(call->id, entry->ip, 1, entry, size); | 1259 | perf_tp_event(call->id, entry->ip, 1, entry, size); |
1265 | 1260 | ||
1266 | end_recursion: | ||
1267 | trace_buf->recursion--; | ||
1268 | end: | 1261 | end: |
1262 | perf_swevent_put_recursion_context(rctx); | ||
1263 | end_recursion: | ||
1269 | local_irq_restore(irq_flags); | 1264 | local_irq_restore(irq_flags); |
1270 | 1265 | ||
1271 | return 0; | 1266 | return 0; |
@@ -1278,11 +1273,12 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri, | |||
1278 | struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); | 1273 | struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); |
1279 | struct ftrace_event_call *call = &tp->call; | 1274 | struct ftrace_event_call *call = &tp->call; |
1280 | struct kretprobe_trace_entry *entry; | 1275 | struct kretprobe_trace_entry *entry; |
1281 | struct perf_trace_buf *trace_buf; | ||
1282 | struct trace_entry *ent; | 1276 | struct trace_entry *ent; |
1283 | int size, __size, i, pc, __cpu; | 1277 | int size, __size, i, pc, __cpu; |
1284 | unsigned long irq_flags; | 1278 | unsigned long irq_flags; |
1279 | char *trace_buf; | ||
1285 | char *raw_data; | 1280 | char *raw_data; |
1281 | int rctx; | ||
1286 | 1282 | ||
1287 | pc = preempt_count(); | 1283 | pc = preempt_count(); |
1288 | __size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args); | 1284 | __size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args); |
@@ -1297,6 +1293,11 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri, | |||
1297 | * This also protects the rcu read side | 1293 | * This also protects the rcu read side |
1298 | */ | 1294 | */ |
1299 | local_irq_save(irq_flags); | 1295 | local_irq_save(irq_flags); |
1296 | |||
1297 | rctx = perf_swevent_get_recursion_context(); | ||
1298 | if (rctx < 0) | ||
1299 | goto end_recursion; | ||
1300 | |||
1300 | __cpu = smp_processor_id(); | 1301 | __cpu = smp_processor_id(); |
1301 | 1302 | ||
1302 | if (in_nmi()) | 1303 | if (in_nmi()) |
@@ -1307,18 +1308,7 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri, | |||
1307 | if (!trace_buf) | 1308 | if (!trace_buf) |
1308 | goto end; | 1309 | goto end; |
1309 | 1310 | ||
1310 | trace_buf = per_cpu_ptr(trace_buf, __cpu); | 1311 | raw_data = per_cpu_ptr(trace_buf, __cpu); |
1311 | |||
1312 | if (trace_buf->recursion++) | ||
1313 | goto end_recursion; | ||
1314 | |||
1315 | /* | ||
1316 | * Make recursion update visible before entering perf_tp_event | ||
1317 | * so that we protect from perf recursions. | ||
1318 | */ | ||
1319 | barrier(); | ||
1320 | |||
1321 | raw_data = trace_buf->buf; | ||
1322 | 1312 | ||
1323 | /* Zero dead bytes from alignment to avoid buffer leak to userspace */ | 1313 | /* Zero dead bytes from alignment to avoid buffer leak to userspace */ |
1324 | *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; | 1314 | *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; |
@@ -1334,9 +1324,9 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri, | |||
1334 | entry->args[i] = call_fetch(&tp->args[i].fetch, regs); | 1324 | entry->args[i] = call_fetch(&tp->args[i].fetch, regs); |
1335 | perf_tp_event(call->id, entry->ret_ip, 1, entry, size); | 1325 | perf_tp_event(call->id, entry->ret_ip, 1, entry, size); |
1336 | 1326 | ||
1337 | end_recursion: | ||
1338 | trace_buf->recursion--; | ||
1339 | end: | 1327 | end: |
1328 | perf_swevent_put_recursion_context(rctx); | ||
1329 | end_recursion: | ||
1340 | local_irq_restore(irq_flags); | 1330 | local_irq_restore(irq_flags); |
1341 | 1331 | ||
1342 | return 0; | 1332 | return 0; |
diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c new file mode 100644 index 000000000000..11935b53a6cb --- /dev/null +++ b/kernel/trace/trace_ksym.c | |||
@@ -0,0 +1,554 @@ | |||
1 | /* | ||
2 | * trace_ksym.c - Kernel Symbol Tracer | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License as published by | ||
6 | * the Free Software Foundation; either version 2 of the License, or | ||
7 | * (at your option) any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write to the Free Software | ||
16 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
17 | * | ||
18 | * Copyright (C) IBM Corporation, 2009 | ||
19 | */ | ||
20 | |||
21 | #include <linux/kallsyms.h> | ||
22 | #include <linux/uaccess.h> | ||
23 | #include <linux/debugfs.h> | ||
24 | #include <linux/ftrace.h> | ||
25 | #include <linux/module.h> | ||
26 | #include <linux/fs.h> | ||
27 | |||
28 | #include "trace_output.h" | ||
29 | #include "trace_stat.h" | ||
30 | #include "trace.h" | ||
31 | |||
32 | #include <linux/hw_breakpoint.h> | ||
33 | #include <asm/hw_breakpoint.h> | ||
34 | |||
35 | /* | ||
36 | * For now, let us restrict the no. of symbols traced simultaneously to number | ||
37 | * of available hardware breakpoint registers. | ||
38 | */ | ||
39 | #define KSYM_TRACER_MAX HBP_NUM | ||
40 | |||
41 | #define KSYM_TRACER_OP_LEN 3 /* rw- */ | ||
42 | |||
43 | struct trace_ksym { | ||
44 | struct perf_event **ksym_hbp; | ||
45 | unsigned long ksym_addr; | ||
46 | int type; | ||
47 | int len; | ||
48 | #ifdef CONFIG_PROFILE_KSYM_TRACER | ||
49 | unsigned long counter; | ||
50 | #endif | ||
51 | struct hlist_node ksym_hlist; | ||
52 | }; | ||
53 | |||
54 | static struct trace_array *ksym_trace_array; | ||
55 | |||
56 | static unsigned int ksym_filter_entry_count; | ||
57 | static unsigned int ksym_tracing_enabled; | ||
58 | |||
59 | static HLIST_HEAD(ksym_filter_head); | ||
60 | |||
61 | static DEFINE_MUTEX(ksym_tracer_mutex); | ||
62 | |||
63 | #ifdef CONFIG_PROFILE_KSYM_TRACER | ||
64 | |||
65 | #define MAX_UL_INT 0xffffffff | ||
66 | |||
67 | void ksym_collect_stats(unsigned long hbp_hit_addr) | ||
68 | { | ||
69 | struct hlist_node *node; | ||
70 | struct trace_ksym *entry; | ||
71 | |||
72 | rcu_read_lock(); | ||
73 | hlist_for_each_entry_rcu(entry, node, &ksym_filter_head, ksym_hlist) { | ||
74 | if ((entry->ksym_addr == hbp_hit_addr) && | ||
75 | (entry->counter <= MAX_UL_INT)) { | ||
76 | entry->counter++; | ||
77 | break; | ||
78 | } | ||
79 | } | ||
80 | rcu_read_unlock(); | ||
81 | } | ||
82 | #endif /* CONFIG_PROFILE_KSYM_TRACER */ | ||
83 | |||
84 | void ksym_hbp_handler(struct perf_event *hbp, void *data) | ||
85 | { | ||
86 | struct ring_buffer_event *event; | ||
87 | struct ksym_trace_entry *entry; | ||
88 | struct pt_regs *regs = data; | ||
89 | struct ring_buffer *buffer; | ||
90 | int pc; | ||
91 | |||
92 | if (!ksym_tracing_enabled) | ||
93 | return; | ||
94 | |||
95 | buffer = ksym_trace_array->buffer; | ||
96 | |||
97 | pc = preempt_count(); | ||
98 | |||
99 | event = trace_buffer_lock_reserve(buffer, TRACE_KSYM, | ||
100 | sizeof(*entry), 0, pc); | ||
101 | if (!event) | ||
102 | return; | ||
103 | |||
104 | entry = ring_buffer_event_data(event); | ||
105 | entry->ip = instruction_pointer(regs); | ||
106 | entry->type = hw_breakpoint_type(hbp); | ||
107 | entry->addr = hw_breakpoint_addr(hbp); | ||
108 | strlcpy(entry->cmd, current->comm, TASK_COMM_LEN); | ||
109 | |||
110 | #ifdef CONFIG_PROFILE_KSYM_TRACER | ||
111 | ksym_collect_stats(hw_breakpoint_addr(hbp)); | ||
112 | #endif /* CONFIG_PROFILE_KSYM_TRACER */ | ||
113 | |||
114 | trace_buffer_unlock_commit(buffer, event, 0, pc); | ||
115 | } | ||
116 | |||
117 | /* Valid access types are represented as | ||
118 | * | ||
119 | * rw- : Set Read/Write Access Breakpoint | ||
120 | * -w- : Set Write Access Breakpoint | ||
121 | * --- : Clear Breakpoints | ||
122 | * --x : Set Execution Break points (Not available yet) | ||
123 | * | ||
124 | */ | ||
125 | static int ksym_trace_get_access_type(char *str) | ||
126 | { | ||
127 | int access = 0; | ||
128 | |||
129 | if (str[0] == 'r') | ||
130 | access |= HW_BREAKPOINT_R; | ||
131 | |||
132 | if (str[1] == 'w') | ||
133 | access |= HW_BREAKPOINT_W; | ||
134 | |||
135 | if (str[2] == 'x') | ||
136 | access |= HW_BREAKPOINT_X; | ||
137 | |||
138 | switch (access) { | ||
139 | case HW_BREAKPOINT_R: | ||
140 | case HW_BREAKPOINT_W: | ||
141 | case HW_BREAKPOINT_W | HW_BREAKPOINT_R: | ||
142 | return access; | ||
143 | default: | ||
144 | return -EINVAL; | ||
145 | } | ||
146 | } | ||
147 | |||
148 | /* | ||
149 | * There can be several possible malformed requests and we attempt to capture | ||
150 | * all of them. We enumerate some of the rules | ||
151 | * 1. We will not allow kernel symbols with ':' since it is used as a delimiter. | ||
152 | * i.e. multiple ':' symbols disallowed. Possible uses are of the form | ||
153 | * <module>:<ksym_name>:<op>. | ||
154 | * 2. No delimiter symbol ':' in the input string | ||
155 | * 3. Spurious operator symbols or symbols not in their respective positions | ||
156 | * 4. <ksym_name>:--- i.e. clear breakpoint request when ksym_name not in file | ||
157 | * 5. Kernel symbol not a part of /proc/kallsyms | ||
158 | * 6. Duplicate requests | ||
159 | */ | ||
160 | static int parse_ksym_trace_str(char *input_string, char **ksymname, | ||
161 | unsigned long *addr) | ||
162 | { | ||
163 | int ret; | ||
164 | |||
165 | *ksymname = strsep(&input_string, ":"); | ||
166 | *addr = kallsyms_lookup_name(*ksymname); | ||
167 | |||
168 | /* Check for malformed request: (2), (1) and (5) */ | ||
169 | if ((!input_string) || | ||
170 | (strlen(input_string) != KSYM_TRACER_OP_LEN) || | ||
171 | (*addr == 0)) | ||
172 | return -EINVAL;; | ||
173 | |||
174 | ret = ksym_trace_get_access_type(input_string); | ||
175 | |||
176 | return ret; | ||
177 | } | ||
178 | |||
179 | int process_new_ksym_entry(char *ksymname, int op, unsigned long addr) | ||
180 | { | ||
181 | struct trace_ksym *entry; | ||
182 | int ret = -ENOMEM; | ||
183 | |||
184 | if (ksym_filter_entry_count >= KSYM_TRACER_MAX) { | ||
185 | printk(KERN_ERR "ksym_tracer: Maximum limit:(%d) reached. No" | ||
186 | " new requests for tracing can be accepted now.\n", | ||
187 | KSYM_TRACER_MAX); | ||
188 | return -ENOSPC; | ||
189 | } | ||
190 | |||
191 | entry = kzalloc(sizeof(struct trace_ksym), GFP_KERNEL); | ||
192 | if (!entry) | ||
193 | return -ENOMEM; | ||
194 | |||
195 | entry->type = op; | ||
196 | entry->ksym_addr = addr; | ||
197 | entry->len = HW_BREAKPOINT_LEN_4; | ||
198 | |||
199 | ret = -EAGAIN; | ||
200 | entry->ksym_hbp = register_wide_hw_breakpoint(entry->ksym_addr, | ||
201 | entry->len, entry->type, | ||
202 | ksym_hbp_handler, true); | ||
203 | if (IS_ERR(entry->ksym_hbp)) { | ||
204 | entry->ksym_hbp = NULL; | ||
205 | ret = PTR_ERR(entry->ksym_hbp); | ||
206 | } | ||
207 | |||
208 | if (!entry->ksym_hbp) { | ||
209 | printk(KERN_INFO "ksym_tracer request failed. Try again" | ||
210 | " later!!\n"); | ||
211 | goto err; | ||
212 | } | ||
213 | |||
214 | hlist_add_head_rcu(&(entry->ksym_hlist), &ksym_filter_head); | ||
215 | ksym_filter_entry_count++; | ||
216 | |||
217 | return 0; | ||
218 | |||
219 | err: | ||
220 | kfree(entry); | ||
221 | |||
222 | return ret; | ||
223 | } | ||
224 | |||
225 | static ssize_t ksym_trace_filter_read(struct file *filp, char __user *ubuf, | ||
226 | size_t count, loff_t *ppos) | ||
227 | { | ||
228 | struct trace_ksym *entry; | ||
229 | struct hlist_node *node; | ||
230 | struct trace_seq *s; | ||
231 | ssize_t cnt = 0; | ||
232 | int ret; | ||
233 | |||
234 | s = kmalloc(sizeof(*s), GFP_KERNEL); | ||
235 | if (!s) | ||
236 | return -ENOMEM; | ||
237 | trace_seq_init(s); | ||
238 | |||
239 | mutex_lock(&ksym_tracer_mutex); | ||
240 | |||
241 | hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) { | ||
242 | ret = trace_seq_printf(s, "%pS:", (void *)entry->ksym_addr); | ||
243 | if (entry->type == HW_BREAKPOINT_R) | ||
244 | ret = trace_seq_puts(s, "r--\n"); | ||
245 | else if (entry->type == HW_BREAKPOINT_W) | ||
246 | ret = trace_seq_puts(s, "-w-\n"); | ||
247 | else if (entry->type == (HW_BREAKPOINT_W | HW_BREAKPOINT_R)) | ||
248 | ret = trace_seq_puts(s, "rw-\n"); | ||
249 | WARN_ON_ONCE(!ret); | ||
250 | } | ||
251 | |||
252 | cnt = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len); | ||
253 | |||
254 | mutex_unlock(&ksym_tracer_mutex); | ||
255 | |||
256 | kfree(s); | ||
257 | |||
258 | return cnt; | ||
259 | } | ||
260 | |||
261 | static void __ksym_trace_reset(void) | ||
262 | { | ||
263 | struct trace_ksym *entry; | ||
264 | struct hlist_node *node, *node1; | ||
265 | |||
266 | mutex_lock(&ksym_tracer_mutex); | ||
267 | hlist_for_each_entry_safe(entry, node, node1, &ksym_filter_head, | ||
268 | ksym_hlist) { | ||
269 | unregister_wide_hw_breakpoint(entry->ksym_hbp); | ||
270 | ksym_filter_entry_count--; | ||
271 | hlist_del_rcu(&(entry->ksym_hlist)); | ||
272 | synchronize_rcu(); | ||
273 | kfree(entry); | ||
274 | } | ||
275 | mutex_unlock(&ksym_tracer_mutex); | ||
276 | } | ||
277 | |||
278 | static ssize_t ksym_trace_filter_write(struct file *file, | ||
279 | const char __user *buffer, | ||
280 | size_t count, loff_t *ppos) | ||
281 | { | ||
282 | struct trace_ksym *entry; | ||
283 | struct hlist_node *node; | ||
284 | char *input_string, *ksymname = NULL; | ||
285 | unsigned long ksym_addr = 0; | ||
286 | int ret, op, changed = 0; | ||
287 | |||
288 | input_string = kzalloc(count + 1, GFP_KERNEL); | ||
289 | if (!input_string) | ||
290 | return -ENOMEM; | ||
291 | |||
292 | if (copy_from_user(input_string, buffer, count)) { | ||
293 | kfree(input_string); | ||
294 | return -EFAULT; | ||
295 | } | ||
296 | input_string[count] = '\0'; | ||
297 | |||
298 | strstrip(input_string); | ||
299 | |||
300 | /* | ||
301 | * Clear all breakpoints if: | ||
302 | * 1: echo > ksym_trace_filter | ||
303 | * 2: echo 0 > ksym_trace_filter | ||
304 | * 3: echo "*:---" > ksym_trace_filter | ||
305 | */ | ||
306 | if (!input_string[0] || !strcmp(input_string, "0") || | ||
307 | !strcmp(input_string, "*:---")) { | ||
308 | __ksym_trace_reset(); | ||
309 | kfree(input_string); | ||
310 | return count; | ||
311 | } | ||
312 | |||
313 | ret = op = parse_ksym_trace_str(input_string, &ksymname, &ksym_addr); | ||
314 | if (ret < 0) { | ||
315 | kfree(input_string); | ||
316 | return ret; | ||
317 | } | ||
318 | |||
319 | mutex_lock(&ksym_tracer_mutex); | ||
320 | |||
321 | ret = -EINVAL; | ||
322 | hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) { | ||
323 | if (entry->ksym_addr == ksym_addr) { | ||
324 | /* Check for malformed request: (6) */ | ||
325 | if (entry->type != op) | ||
326 | changed = 1; | ||
327 | else | ||
328 | goto out; | ||
329 | break; | ||
330 | } | ||
331 | } | ||
332 | if (changed) { | ||
333 | unregister_wide_hw_breakpoint(entry->ksym_hbp); | ||
334 | entry->type = op; | ||
335 | if (op > 0) { | ||
336 | entry->ksym_hbp = | ||
337 | register_wide_hw_breakpoint(entry->ksym_addr, | ||
338 | entry->len, entry->type, | ||
339 | ksym_hbp_handler, true); | ||
340 | if (IS_ERR(entry->ksym_hbp)) | ||
341 | entry->ksym_hbp = NULL; | ||
342 | if (!entry->ksym_hbp) | ||
343 | goto out; | ||
344 | } | ||
345 | ksym_filter_entry_count--; | ||
346 | hlist_del_rcu(&(entry->ksym_hlist)); | ||
347 | synchronize_rcu(); | ||
348 | kfree(entry); | ||
349 | ret = 0; | ||
350 | goto out; | ||
351 | } else { | ||
352 | /* Check for malformed request: (4) */ | ||
353 | if (op == 0) | ||
354 | goto out; | ||
355 | ret = process_new_ksym_entry(ksymname, op, ksym_addr); | ||
356 | } | ||
357 | out: | ||
358 | mutex_unlock(&ksym_tracer_mutex); | ||
359 | |||
360 | kfree(input_string); | ||
361 | |||
362 | if (!ret) | ||
363 | ret = count; | ||
364 | return ret; | ||
365 | } | ||
366 | |||
367 | static const struct file_operations ksym_tracing_fops = { | ||
368 | .open = tracing_open_generic, | ||
369 | .read = ksym_trace_filter_read, | ||
370 | .write = ksym_trace_filter_write, | ||
371 | }; | ||
372 | |||
373 | static void ksym_trace_reset(struct trace_array *tr) | ||
374 | { | ||
375 | ksym_tracing_enabled = 0; | ||
376 | __ksym_trace_reset(); | ||
377 | } | ||
378 | |||
379 | static int ksym_trace_init(struct trace_array *tr) | ||
380 | { | ||
381 | int cpu, ret = 0; | ||
382 | |||
383 | for_each_online_cpu(cpu) | ||
384 | tracing_reset(tr, cpu); | ||
385 | ksym_tracing_enabled = 1; | ||
386 | ksym_trace_array = tr; | ||
387 | |||
388 | return ret; | ||
389 | } | ||
390 | |||
391 | static void ksym_trace_print_header(struct seq_file *m) | ||
392 | { | ||
393 | seq_puts(m, | ||
394 | "# TASK-PID CPU# Symbol " | ||
395 | "Type Function\n"); | ||
396 | seq_puts(m, | ||
397 | "# | | | " | ||
398 | " | |\n"); | ||
399 | } | ||
400 | |||
401 | static enum print_line_t ksym_trace_output(struct trace_iterator *iter) | ||
402 | { | ||
403 | struct trace_entry *entry = iter->ent; | ||
404 | struct trace_seq *s = &iter->seq; | ||
405 | struct ksym_trace_entry *field; | ||
406 | char str[KSYM_SYMBOL_LEN]; | ||
407 | int ret; | ||
408 | |||
409 | if (entry->type != TRACE_KSYM) | ||
410 | return TRACE_TYPE_UNHANDLED; | ||
411 | |||
412 | trace_assign_type(field, entry); | ||
413 | |||
414 | ret = trace_seq_printf(s, "%11s-%-5d [%03d] %pS", field->cmd, | ||
415 | entry->pid, iter->cpu, (char *)field->addr); | ||
416 | if (!ret) | ||
417 | return TRACE_TYPE_PARTIAL_LINE; | ||
418 | |||
419 | switch (field->type) { | ||
420 | case HW_BREAKPOINT_R: | ||
421 | ret = trace_seq_printf(s, " R "); | ||
422 | break; | ||
423 | case HW_BREAKPOINT_W: | ||
424 | ret = trace_seq_printf(s, " W "); | ||
425 | break; | ||
426 | case HW_BREAKPOINT_R | HW_BREAKPOINT_W: | ||
427 | ret = trace_seq_printf(s, " RW "); | ||
428 | break; | ||
429 | default: | ||
430 | return TRACE_TYPE_PARTIAL_LINE; | ||
431 | } | ||
432 | |||
433 | if (!ret) | ||
434 | return TRACE_TYPE_PARTIAL_LINE; | ||
435 | |||
436 | sprint_symbol(str, field->ip); | ||
437 | ret = trace_seq_printf(s, "%s\n", str); | ||
438 | if (!ret) | ||
439 | return TRACE_TYPE_PARTIAL_LINE; | ||
440 | |||
441 | return TRACE_TYPE_HANDLED; | ||
442 | } | ||
443 | |||
444 | struct tracer ksym_tracer __read_mostly = | ||
445 | { | ||
446 | .name = "ksym_tracer", | ||
447 | .init = ksym_trace_init, | ||
448 | .reset = ksym_trace_reset, | ||
449 | #ifdef CONFIG_FTRACE_SELFTEST | ||
450 | .selftest = trace_selftest_startup_ksym, | ||
451 | #endif | ||
452 | .print_header = ksym_trace_print_header, | ||
453 | .print_line = ksym_trace_output | ||
454 | }; | ||
455 | |||
456 | __init static int init_ksym_trace(void) | ||
457 | { | ||
458 | struct dentry *d_tracer; | ||
459 | struct dentry *entry; | ||
460 | |||
461 | d_tracer = tracing_init_dentry(); | ||
462 | ksym_filter_entry_count = 0; | ||
463 | |||
464 | entry = debugfs_create_file("ksym_trace_filter", 0644, d_tracer, | ||
465 | NULL, &ksym_tracing_fops); | ||
466 | if (!entry) | ||
467 | pr_warning("Could not create debugfs " | ||
468 | "'ksym_trace_filter' file\n"); | ||
469 | |||
470 | return register_tracer(&ksym_tracer); | ||
471 | } | ||
472 | device_initcall(init_ksym_trace); | ||
473 | |||
474 | |||
475 | #ifdef CONFIG_PROFILE_KSYM_TRACER | ||
476 | static int ksym_tracer_stat_headers(struct seq_file *m) | ||
477 | { | ||
478 | seq_puts(m, " Access Type "); | ||
479 | seq_puts(m, " Symbol Counter\n"); | ||
480 | seq_puts(m, " ----------- "); | ||
481 | seq_puts(m, " ------ -------\n"); | ||
482 | return 0; | ||
483 | } | ||
484 | |||
485 | static int ksym_tracer_stat_show(struct seq_file *m, void *v) | ||
486 | { | ||
487 | struct hlist_node *stat = v; | ||
488 | struct trace_ksym *entry; | ||
489 | int access_type = 0; | ||
490 | char fn_name[KSYM_NAME_LEN]; | ||
491 | |||
492 | entry = hlist_entry(stat, struct trace_ksym, ksym_hlist); | ||
493 | |||
494 | access_type = entry->type; | ||
495 | |||
496 | switch (access_type) { | ||
497 | case HW_BREAKPOINT_R: | ||
498 | seq_puts(m, " R "); | ||
499 | break; | ||
500 | case HW_BREAKPOINT_W: | ||
501 | seq_puts(m, " W "); | ||
502 | break; | ||
503 | case HW_BREAKPOINT_R | HW_BREAKPOINT_W: | ||
504 | seq_puts(m, " RW "); | ||
505 | break; | ||
506 | default: | ||
507 | seq_puts(m, " NA "); | ||
508 | } | ||
509 | |||
510 | if (lookup_symbol_name(entry->ksym_addr, fn_name) >= 0) | ||
511 | seq_printf(m, " %-36s", fn_name); | ||
512 | else | ||
513 | seq_printf(m, " %-36s", "<NA>"); | ||
514 | seq_printf(m, " %15lu\n", entry->counter); | ||
515 | |||
516 | return 0; | ||
517 | } | ||
518 | |||
519 | static void *ksym_tracer_stat_start(struct tracer_stat *trace) | ||
520 | { | ||
521 | return ksym_filter_head.first; | ||
522 | } | ||
523 | |||
524 | static void * | ||
525 | ksym_tracer_stat_next(void *v, int idx) | ||
526 | { | ||
527 | struct hlist_node *stat = v; | ||
528 | |||
529 | return stat->next; | ||
530 | } | ||
531 | |||
532 | static struct tracer_stat ksym_tracer_stats = { | ||
533 | .name = "ksym_tracer", | ||
534 | .stat_start = ksym_tracer_stat_start, | ||
535 | .stat_next = ksym_tracer_stat_next, | ||
536 | .stat_headers = ksym_tracer_stat_headers, | ||
537 | .stat_show = ksym_tracer_stat_show | ||
538 | }; | ||
539 | |||
540 | __init static int ksym_tracer_stat_init(void) | ||
541 | { | ||
542 | int ret; | ||
543 | |||
544 | ret = register_stat_tracer(&ksym_tracer_stats); | ||
545 | if (ret) { | ||
546 | printk(KERN_WARNING "Warning: could not register " | ||
547 | "ksym tracer stats\n"); | ||
548 | return 1; | ||
549 | } | ||
550 | |||
551 | return 0; | ||
552 | } | ||
553 | fs_initcall(ksym_tracer_stat_init); | ||
554 | #endif /* CONFIG_PROFILE_KSYM_TRACER */ | ||
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index d2cdbabb4ead..dc98309e839a 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c | |||
@@ -17,6 +17,7 @@ static inline int trace_valid_entry(struct trace_entry *entry) | |||
17 | case TRACE_GRAPH_ENT: | 17 | case TRACE_GRAPH_ENT: |
18 | case TRACE_GRAPH_RET: | 18 | case TRACE_GRAPH_RET: |
19 | case TRACE_HW_BRANCHES: | 19 | case TRACE_HW_BRANCHES: |
20 | case TRACE_KSYM: | ||
20 | return 1; | 21 | return 1; |
21 | } | 22 | } |
22 | return 0; | 23 | return 0; |
@@ -808,3 +809,57 @@ trace_selftest_startup_hw_branches(struct tracer *trace, | |||
808 | return ret; | 809 | return ret; |
809 | } | 810 | } |
810 | #endif /* CONFIG_HW_BRANCH_TRACER */ | 811 | #endif /* CONFIG_HW_BRANCH_TRACER */ |
812 | |||
813 | #ifdef CONFIG_KSYM_TRACER | ||
814 | static int ksym_selftest_dummy; | ||
815 | |||
816 | int | ||
817 | trace_selftest_startup_ksym(struct tracer *trace, struct trace_array *tr) | ||
818 | { | ||
819 | unsigned long count; | ||
820 | int ret; | ||
821 | |||
822 | /* start the tracing */ | ||
823 | ret = tracer_init(trace, tr); | ||
824 | if (ret) { | ||
825 | warn_failed_init_tracer(trace, ret); | ||
826 | return ret; | ||
827 | } | ||
828 | |||
829 | ksym_selftest_dummy = 0; | ||
830 | /* Register the read-write tracing request */ | ||
831 | |||
832 | ret = process_new_ksym_entry("ksym_selftest_dummy", | ||
833 | HW_BREAKPOINT_R | HW_BREAKPOINT_W, | ||
834 | (unsigned long)(&ksym_selftest_dummy)); | ||
835 | |||
836 | if (ret < 0) { | ||
837 | printk(KERN_CONT "ksym_trace read-write startup test failed\n"); | ||
838 | goto ret_path; | ||
839 | } | ||
840 | /* Perform a read and a write operation over the dummy variable to | ||
841 | * trigger the tracer | ||
842 | */ | ||
843 | if (ksym_selftest_dummy == 0) | ||
844 | ksym_selftest_dummy++; | ||
845 | |||
846 | /* stop the tracing. */ | ||
847 | tracing_stop(); | ||
848 | /* check the trace buffer */ | ||
849 | ret = trace_test_buffer(tr, &count); | ||
850 | trace->reset(tr); | ||
851 | tracing_start(); | ||
852 | |||
853 | /* read & write operations - one each is performed on the dummy variable | ||
854 | * triggering two entries in the trace buffer | ||
855 | */ | ||
856 | if (!ret && count != 2) { | ||
857 | printk(KERN_CONT "Ksym tracer startup test failed"); | ||
858 | ret = -1; | ||
859 | } | ||
860 | |||
861 | ret_path: | ||
862 | return ret; | ||
863 | } | ||
864 | #endif /* CONFIG_KSYM_TRACER */ | ||
865 | |||
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 51213b0aa81b..9189cbe86079 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c | |||
@@ -477,11 +477,12 @@ static int sys_prof_refcount_exit; | |||
477 | static void prof_syscall_enter(struct pt_regs *regs, long id) | 477 | static void prof_syscall_enter(struct pt_regs *regs, long id) |
478 | { | 478 | { |
479 | struct syscall_metadata *sys_data; | 479 | struct syscall_metadata *sys_data; |
480 | struct perf_trace_buf *trace_buf; | ||
481 | struct syscall_trace_enter *rec; | 480 | struct syscall_trace_enter *rec; |
482 | unsigned long flags; | 481 | unsigned long flags; |
482 | char *trace_buf; | ||
483 | char *raw_data; | 483 | char *raw_data; |
484 | int syscall_nr; | 484 | int syscall_nr; |
485 | int rctx; | ||
485 | int size; | 486 | int size; |
486 | int cpu; | 487 | int cpu; |
487 | 488 | ||
@@ -505,28 +506,18 @@ static void prof_syscall_enter(struct pt_regs *regs, long id) | |||
505 | /* Protect the per cpu buffer, begin the rcu read side */ | 506 | /* Protect the per cpu buffer, begin the rcu read side */ |
506 | local_irq_save(flags); | 507 | local_irq_save(flags); |
507 | 508 | ||
509 | rctx = perf_swevent_get_recursion_context(); | ||
510 | if (rctx < 0) | ||
511 | goto end_recursion; | ||
512 | |||
508 | cpu = smp_processor_id(); | 513 | cpu = smp_processor_id(); |
509 | 514 | ||
510 | if (in_nmi()) | 515 | trace_buf = rcu_dereference(perf_trace_buf); |
511 | trace_buf = rcu_dereference(perf_trace_buf_nmi); | ||
512 | else | ||
513 | trace_buf = rcu_dereference(perf_trace_buf); | ||
514 | 516 | ||
515 | if (!trace_buf) | 517 | if (!trace_buf) |
516 | goto end; | 518 | goto end; |
517 | 519 | ||
518 | trace_buf = per_cpu_ptr(trace_buf, cpu); | 520 | raw_data = per_cpu_ptr(trace_buf, cpu); |
519 | |||
520 | if (trace_buf->recursion++) | ||
521 | goto end_recursion; | ||
522 | |||
523 | /* | ||
524 | * Make recursion update visible before entering perf_tp_event | ||
525 | * so that we protect from perf recursions. | ||
526 | */ | ||
527 | barrier(); | ||
528 | |||
529 | raw_data = trace_buf->buf; | ||
530 | 521 | ||
531 | /* zero the dead bytes from align to not leak stack to user */ | 522 | /* zero the dead bytes from align to not leak stack to user */ |
532 | *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; | 523 | *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; |
@@ -539,9 +530,9 @@ static void prof_syscall_enter(struct pt_regs *regs, long id) | |||
539 | (unsigned long *)&rec->args); | 530 | (unsigned long *)&rec->args); |
540 | perf_tp_event(sys_data->enter_id, 0, 1, rec, size); | 531 | perf_tp_event(sys_data->enter_id, 0, 1, rec, size); |
541 | 532 | ||
542 | end_recursion: | ||
543 | trace_buf->recursion--; | ||
544 | end: | 533 | end: |
534 | perf_swevent_put_recursion_context(rctx); | ||
535 | end_recursion: | ||
545 | local_irq_restore(flags); | 536 | local_irq_restore(flags); |
546 | } | 537 | } |
547 | 538 | ||
@@ -588,10 +579,11 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) | |||
588 | { | 579 | { |
589 | struct syscall_metadata *sys_data; | 580 | struct syscall_metadata *sys_data; |
590 | struct syscall_trace_exit *rec; | 581 | struct syscall_trace_exit *rec; |
591 | struct perf_trace_buf *trace_buf; | ||
592 | unsigned long flags; | 582 | unsigned long flags; |
593 | int syscall_nr; | 583 | int syscall_nr; |
584 | char *trace_buf; | ||
594 | char *raw_data; | 585 | char *raw_data; |
586 | int rctx; | ||
595 | int size; | 587 | int size; |
596 | int cpu; | 588 | int cpu; |
597 | 589 | ||
@@ -617,28 +609,19 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) | |||
617 | 609 | ||
618 | /* Protect the per cpu buffer, begin the rcu read side */ | 610 | /* Protect the per cpu buffer, begin the rcu read side */ |
619 | local_irq_save(flags); | 611 | local_irq_save(flags); |
612 | |||
613 | rctx = perf_swevent_get_recursion_context(); | ||
614 | if (rctx < 0) | ||
615 | goto end_recursion; | ||
616 | |||
620 | cpu = smp_processor_id(); | 617 | cpu = smp_processor_id(); |
621 | 618 | ||
622 | if (in_nmi()) | 619 | trace_buf = rcu_dereference(perf_trace_buf); |
623 | trace_buf = rcu_dereference(perf_trace_buf_nmi); | ||
624 | else | ||
625 | trace_buf = rcu_dereference(perf_trace_buf); | ||
626 | 620 | ||
627 | if (!trace_buf) | 621 | if (!trace_buf) |
628 | goto end; | 622 | goto end; |
629 | 623 | ||
630 | trace_buf = per_cpu_ptr(trace_buf, cpu); | 624 | raw_data = per_cpu_ptr(trace_buf, cpu); |
631 | |||
632 | if (trace_buf->recursion++) | ||
633 | goto end_recursion; | ||
634 | |||
635 | /* | ||
636 | * Make recursion update visible before entering perf_tp_event | ||
637 | * so that we protect from perf recursions. | ||
638 | */ | ||
639 | barrier(); | ||
640 | |||
641 | raw_data = trace_buf->buf; | ||
642 | 625 | ||
643 | /* zero the dead bytes from align to not leak stack to user */ | 626 | /* zero the dead bytes from align to not leak stack to user */ |
644 | *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; | 627 | *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; |
@@ -652,9 +635,9 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) | |||
652 | 635 | ||
653 | perf_tp_event(sys_data->exit_id, 0, 1, rec, size); | 636 | perf_tp_event(sys_data->exit_id, 0, 1, rec, size); |
654 | 637 | ||
655 | end_recursion: | ||
656 | trace_buf->recursion--; | ||
657 | end: | 638 | end: |
639 | perf_swevent_put_recursion_context(rctx); | ||
640 | end_recursion: | ||
658 | local_irq_restore(flags); | 641 | local_irq_restore(flags); |
659 | } | 642 | } |
660 | 643 | ||
diff --git a/samples/Kconfig b/samples/Kconfig index b92bde3c6a89..e4be84ac3d38 100644 --- a/samples/Kconfig +++ b/samples/Kconfig | |||
@@ -40,5 +40,11 @@ config SAMPLE_KRETPROBES | |||
40 | default m | 40 | default m |
41 | depends on SAMPLE_KPROBES && KRETPROBES | 41 | depends on SAMPLE_KPROBES && KRETPROBES |
42 | 42 | ||
43 | config SAMPLE_HW_BREAKPOINT | ||
44 | tristate "Build kernel hardware breakpoint examples -- loadable module only" | ||
45 | depends on HAVE_HW_BREAKPOINT && m | ||
46 | help | ||
47 | This builds kernel hardware breakpoint example modules. | ||
48 | |||
43 | endif # SAMPLES | 49 | endif # SAMPLES |
44 | 50 | ||
diff --git a/samples/Makefile b/samples/Makefile index 43343a03b1f4..0f15e6d77fd6 100644 --- a/samples/Makefile +++ b/samples/Makefile | |||
@@ -1,3 +1,4 @@ | |||
1 | # Makefile for Linux samples code | 1 | # Makefile for Linux samples code |
2 | 2 | ||
3 | obj-$(CONFIG_SAMPLES) += kobject/ kprobes/ tracepoints/ trace_events/ | 3 | obj-$(CONFIG_SAMPLES) += kobject/ kprobes/ tracepoints/ trace_events/ \ |
4 | hw_breakpoint/ | ||
diff --git a/samples/hw_breakpoint/Makefile b/samples/hw_breakpoint/Makefile new file mode 100644 index 000000000000..0f5c31c2fc47 --- /dev/null +++ b/samples/hw_breakpoint/Makefile | |||
@@ -0,0 +1 @@ | |||
obj-$(CONFIG_SAMPLE_HW_BREAKPOINT) += data_breakpoint.o | |||
diff --git a/samples/hw_breakpoint/data_breakpoint.c b/samples/hw_breakpoint/data_breakpoint.c new file mode 100644 index 000000000000..95063818bcf4 --- /dev/null +++ b/samples/hw_breakpoint/data_breakpoint.c | |||
@@ -0,0 +1,90 @@ | |||
1 | /* | ||
2 | * data_breakpoint.c - Sample HW Breakpoint file to watch kernel data address | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License as published by | ||
6 | * the Free Software Foundation; either version 2 of the License, or | ||
7 | * (at your option) any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write to the Free Software | ||
16 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
17 | * | ||
18 | * usage: insmod data_breakpoint.ko ksym=<ksym_name> | ||
19 | * | ||
20 | * This file is a kernel module that places a breakpoint over ksym_name kernel | ||
21 | * variable using Hardware Breakpoint register. The corresponding handler which | ||
22 | * prints a backtrace is invoked everytime a write operation is performed on | ||
23 | * that variable. | ||
24 | * | ||
25 | * Copyright (C) IBM Corporation, 2009 | ||
26 | * | ||
27 | * Author: K.Prasad <prasad@linux.vnet.ibm.com> | ||
28 | */ | ||
29 | #include <linux/module.h> /* Needed by all modules */ | ||
30 | #include <linux/kernel.h> /* Needed for KERN_INFO */ | ||
31 | #include <linux/init.h> /* Needed for the macros */ | ||
32 | #include <linux/kallsyms.h> | ||
33 | |||
34 | #include <linux/perf_event.h> | ||
35 | #include <linux/hw_breakpoint.h> | ||
36 | |||
37 | struct perf_event **sample_hbp; | ||
38 | |||
39 | static char ksym_name[KSYM_NAME_LEN] = "pid_max"; | ||
40 | module_param_string(ksym, ksym_name, KSYM_NAME_LEN, S_IRUGO); | ||
41 | MODULE_PARM_DESC(ksym, "Kernel symbol to monitor; this module will report any" | ||
42 | " write operations on the kernel symbol"); | ||
43 | |||
44 | static void sample_hbp_handler(struct perf_event *temp, void *data) | ||
45 | { | ||
46 | printk(KERN_INFO "%s value is changed\n", ksym_name); | ||
47 | dump_stack(); | ||
48 | printk(KERN_INFO "Dump stack from sample_hbp_handler\n"); | ||
49 | } | ||
50 | |||
51 | static int __init hw_break_module_init(void) | ||
52 | { | ||
53 | int ret; | ||
54 | unsigned long addr; | ||
55 | |||
56 | addr = kallsyms_lookup_name(ksym_name); | ||
57 | |||
58 | sample_hbp = register_wide_hw_breakpoint(addr, HW_BREAKPOINT_LEN_4, | ||
59 | HW_BREAKPOINT_W | HW_BREAKPOINT_R, | ||
60 | sample_hbp_handler, true); | ||
61 | if (IS_ERR(sample_hbp)) { | ||
62 | ret = PTR_ERR(sample_hbp); | ||
63 | goto fail; | ||
64 | } else if (!sample_hbp) { | ||
65 | ret = -EINVAL; | ||
66 | goto fail; | ||
67 | } | ||
68 | |||
69 | printk(KERN_INFO "HW Breakpoint for %s write installed\n", ksym_name); | ||
70 | |||
71 | return 0; | ||
72 | |||
73 | fail: | ||
74 | printk(KERN_INFO "Breakpoint registration failed\n"); | ||
75 | |||
76 | return ret; | ||
77 | } | ||
78 | |||
79 | static void __exit hw_break_module_exit(void) | ||
80 | { | ||
81 | unregister_wide_hw_breakpoint(sample_hbp); | ||
82 | printk(KERN_INFO "HW Breakpoint for %s write uninstalled\n", ksym_name); | ||
83 | } | ||
84 | |||
85 | module_init(hw_break_module_init); | ||
86 | module_exit(hw_break_module_exit); | ||
87 | |||
88 | MODULE_LICENSE("GPL"); | ||
89 | MODULE_AUTHOR("K.Prasad"); | ||
90 | MODULE_DESCRIPTION("ksym breakpoint"); | ||
diff --git a/tools/perf/Documentation/perf-kmem.txt b/tools/perf/Documentation/perf-kmem.txt new file mode 100644 index 000000000000..44b0ce35c28a --- /dev/null +++ b/tools/perf/Documentation/perf-kmem.txt | |||
@@ -0,0 +1,44 @@ | |||
1 | perf-kmem(1) | ||
2 | ============== | ||
3 | |||
4 | NAME | ||
5 | ---- | ||
6 | perf-kmem - Tool to trace/measure kernel memory(slab) properties | ||
7 | |||
8 | SYNOPSIS | ||
9 | -------- | ||
10 | [verse] | ||
11 | 'perf kmem' {record} [<options>] | ||
12 | |||
13 | DESCRIPTION | ||
14 | ----------- | ||
15 | There's two variants of perf kmem: | ||
16 | |||
17 | 'perf kmem record <command>' to record the kmem events | ||
18 | of an arbitrary workload. | ||
19 | |||
20 | 'perf kmem' to report kernel memory statistics. | ||
21 | |||
22 | OPTIONS | ||
23 | ------- | ||
24 | -i <file>:: | ||
25 | --input=<file>:: | ||
26 | Select the input file (default: perf.data) | ||
27 | |||
28 | --stat=<caller|alloc>:: | ||
29 | Select per callsite or per allocation statistics | ||
30 | |||
31 | -s <key[,key2...]>:: | ||
32 | --sort=<key[,key2...]>:: | ||
33 | Sort the output (default: frag,hit,bytes) | ||
34 | |||
35 | -l <num>:: | ||
36 | --line=<num>:: | ||
37 | Print n lines only | ||
38 | |||
39 | --raw-ip:: | ||
40 | Print raw ip instead of symbol | ||
41 | |||
42 | SEE ALSO | ||
43 | -------- | ||
44 | linkperf:perf-record[1] | ||
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 0ff23de9e453..fc46c0b40f6e 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt | |||
@@ -26,11 +26,19 @@ OPTIONS | |||
26 | 26 | ||
27 | -e:: | 27 | -e:: |
28 | --event=:: | 28 | --event=:: |
29 | Select the PMU event. Selection can be a symbolic event name | 29 | Select the PMU event. Selection can be: |
30 | (use 'perf list' to list all events) or a raw PMU | ||
31 | event (eventsel+umask) in the form of rNNN where NNN is a | ||
32 | hexadecimal event descriptor. | ||
33 | 30 | ||
31 | - a symbolic event name (use 'perf list' to list all events) | ||
32 | |||
33 | - a raw PMU event (eventsel+umask) in the form of rNNN where NNN is a | ||
34 | hexadecimal event descriptor. | ||
35 | |||
36 | - a hardware breakpoint event in the form of '\mem:addr[:access]' | ||
37 | where addr is the address in memory you want to break in. | ||
38 | Access is the memory access type (read, write, execute) it can | ||
39 | be passed as follows: '\mem:addr[:[r][w][x]]'. | ||
40 | If you want to profile read-write accesses in 0x1000, just set | ||
41 | 'mem:0x1000:rw'. | ||
34 | -a:: | 42 | -a:: |
35 | System-wide collection. | 43 | System-wide collection. |
36 | 44 | ||
diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 53e663a5fa2f..f1537a94a05f 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile | |||
@@ -2,6 +2,7 @@ | |||
2 | all:: | 2 | all:: |
3 | 3 | ||
4 | # Define V=1 to have a more verbose compile. | 4 | # Define V=1 to have a more verbose compile. |
5 | # Define V=2 to have an even more verbose compile. | ||
5 | # | 6 | # |
6 | # Define SNPRINTF_RETURNS_BOGUS if your are on a system which snprintf() | 7 | # Define SNPRINTF_RETURNS_BOGUS if your are on a system which snprintf() |
7 | # or vsnprintf() return -1 instead of number of characters which would | 8 | # or vsnprintf() return -1 instead of number of characters which would |
@@ -147,6 +148,8 @@ all:: | |||
147 | # broken, or spawning external process is slower than built-in grep perf has). | 148 | # broken, or spawning external process is slower than built-in grep perf has). |
148 | # | 149 | # |
149 | # Define LDFLAGS=-static to build a static binary. | 150 | # Define LDFLAGS=-static to build a static binary. |
151 | # | ||
152 | # Define EXTRA_CFLAGS=-m64 or EXTRA_CFLAGS=-m32 as appropriate for cross-builds. | ||
150 | 153 | ||
151 | PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE | 154 | PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE |
152 | @$(SHELL_PATH) util/PERF-VERSION-GEN | 155 | @$(SHELL_PATH) util/PERF-VERSION-GEN |
@@ -159,22 +162,6 @@ uname_R := $(shell sh -c 'uname -r 2>/dev/null || echo not') | |||
159 | uname_P := $(shell sh -c 'uname -p 2>/dev/null || echo not') | 162 | uname_P := $(shell sh -c 'uname -p 2>/dev/null || echo not') |
160 | uname_V := $(shell sh -c 'uname -v 2>/dev/null || echo not') | 163 | uname_V := $(shell sh -c 'uname -v 2>/dev/null || echo not') |
161 | 164 | ||
162 | # | ||
163 | # Add -m32 for cross-builds: | ||
164 | # | ||
165 | ifdef NO_64BIT | ||
166 | MBITS := -m32 | ||
167 | else | ||
168 | # | ||
169 | # If we're on a 64-bit kernel (except ia64), use -m64: | ||
170 | # | ||
171 | ifneq ($(uname_M),ia64) | ||
172 | ifneq ($(patsubst %64,%,$(uname_M)),$(uname_M)) | ||
173 | MBITS := -m64 | ||
174 | endif | ||
175 | endif | ||
176 | endif | ||
177 | |||
178 | # CFLAGS and LDFLAGS are for the users to override from the command line. | 165 | # CFLAGS and LDFLAGS are for the users to override from the command line. |
179 | 166 | ||
180 | # | 167 | # |
@@ -211,7 +198,7 @@ ifndef PERF_DEBUG | |||
211 | CFLAGS_OPTIMIZE = -O6 | 198 | CFLAGS_OPTIMIZE = -O6 |
212 | endif | 199 | endif |
213 | 200 | ||
214 | CFLAGS = $(MBITS) -ggdb3 -Wall -Wextra -std=gnu99 -Werror $(CFLAGS_OPTIMIZE) -D_FORTIFY_SOURCE=2 $(EXTRA_WARNINGS) | 201 | CFLAGS = -ggdb3 -Wall -Wextra -std=gnu99 -Werror $(CFLAGS_OPTIMIZE) -D_FORTIFY_SOURCE=2 $(EXTRA_WARNINGS) $(EXTRA_CFLAGS) |
215 | EXTLIBS = -lpthread -lrt -lelf -lm | 202 | EXTLIBS = -lpthread -lrt -lelf -lm |
216 | ALL_CFLAGS = $(CFLAGS) | 203 | ALL_CFLAGS = $(CFLAGS) |
217 | ALL_LDFLAGS = $(LDFLAGS) | 204 | ALL_LDFLAGS = $(LDFLAGS) |
@@ -263,7 +250,7 @@ PTHREAD_LIBS = -lpthread | |||
263 | # explicitly what architecture to check for. Fix this up for yours.. | 250 | # explicitly what architecture to check for. Fix this up for yours.. |
264 | SPARSE_FLAGS = -D__BIG_ENDIAN__ -D__powerpc__ | 251 | SPARSE_FLAGS = -D__BIG_ENDIAN__ -D__powerpc__ |
265 | 252 | ||
266 | ifeq ($(shell sh -c "echo 'int foo(void) {char X[2]; return 3;}' | $(CC) -x c -c -Werror -fstack-protector-all - -o /dev/null >/dev/null 2>&1 && echo y"), y) | 253 | ifeq ($(shell sh -c "echo 'int foo(void) {char X[2]; return 3;}' | $(CC) -x c -c -Werror -fstack-protector-all - -o /dev/null "$(QUIET_STDERR)" && echo y"), y) |
267 | CFLAGS := $(CFLAGS) -fstack-protector-all | 254 | CFLAGS := $(CFLAGS) -fstack-protector-all |
268 | endif | 255 | endif |
269 | 256 | ||
@@ -445,9 +432,15 @@ BUILTIN_OBJS += builtin-timechart.o | |||
445 | BUILTIN_OBJS += builtin-top.o | 432 | BUILTIN_OBJS += builtin-top.o |
446 | BUILTIN_OBJS += builtin-trace.o | 433 | BUILTIN_OBJS += builtin-trace.o |
447 | BUILTIN_OBJS += builtin-probe.o | 434 | BUILTIN_OBJS += builtin-probe.o |
435 | BUILTIN_OBJS += builtin-kmem.o | ||
448 | 436 | ||
449 | PERFLIBS = $(LIB_FILE) | 437 | PERFLIBS = $(LIB_FILE) |
450 | 438 | ||
439 | ifeq ($(V), 2) | ||
440 | QUIET_STDERR = ">/dev/null" | ||
441 | else | ||
442 | QUIET_STDERR = ">/dev/null 2>&1" | ||
443 | endif | ||
451 | # | 444 | # |
452 | # Platform specific tweaks | 445 | # Platform specific tweaks |
453 | # | 446 | # |
@@ -475,19 +468,19 @@ ifeq ($(uname_S),Darwin) | |||
475 | PTHREAD_LIBS = | 468 | PTHREAD_LIBS = |
476 | endif | 469 | endif |
477 | 470 | ||
478 | ifeq ($(shell sh -c "(echo '\#include <libelf.h>'; echo 'int main(void) { Elf * elf = elf_begin(0, ELF_C_READ, 0); return (long)elf; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) > /dev/null 2>&1 && echo y"), y) | 471 | ifeq ($(shell sh -c "(echo '\#include <libelf.h>'; echo 'int main(void) { Elf * elf = elf_begin(0, ELF_C_READ, 0); return (long)elf; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) "$(QUIET_STDERR)" && echo y"), y) |
479 | ifneq ($(shell sh -c "(echo '\#include <gnu/libc-version.h>'; echo 'int main(void) { const char * version = gnu_get_libc_version(); return (long)version; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) > /dev/null 2>&1 && echo y"), y) | 472 | ifneq ($(shell sh -c "(echo '\#include <gnu/libc-version.h>'; echo 'int main(void) { const char * version = gnu_get_libc_version(); return (long)version; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) "$(QUIET_STDERR)" && echo y"), y) |
480 | msg := $(error No gnu/libc-version.h found, please install glibc-dev[el]); | 473 | msg := $(error No gnu/libc-version.h found, please install glibc-dev[el]/glibc-static); |
481 | endif | 474 | endif |
482 | 475 | ||
483 | ifneq ($(shell sh -c "(echo '\#include <libelf.h>'; echo 'int main(void) { Elf * elf = elf_begin(0, ELF_C_READ_MMAP, 0); return (long)elf; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) > /dev/null 2>&1 && echo y"), y) | 476 | ifneq ($(shell sh -c "(echo '\#include <libelf.h>'; echo 'int main(void) { Elf * elf = elf_begin(0, ELF_C_READ_MMAP, 0); return (long)elf; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) "$(QUIET_STDERR)" && echo y"), y) |
484 | BASIC_CFLAGS += -DLIBELF_NO_MMAP | 477 | BASIC_CFLAGS += -DLIBELF_NO_MMAP |
485 | endif | 478 | endif |
486 | else | 479 | else |
487 | msg := $(error No libelf.h/libelf found, please install libelf-dev/elfutils-libelf-devel and glibc-dev[el]); | 480 | msg := $(error No libelf.h/libelf found, please install libelf-dev/elfutils-libelf-devel and glibc-dev[el]); |
488 | endif | 481 | endif |
489 | 482 | ||
490 | ifneq ($(shell sh -c "(echo '\#include <libdwarf/dwarf.h>'; echo '\#include <libdwarf/libdwarf.h>'; echo 'int main(void) { Dwarf_Debug dbg; Dwarf_Error err; Dwarf_Ranges *rng; dwarf_init(0, DW_DLC_READ, 0, 0, &dbg, &err); dwarf_get_ranges(dbg, 0, &rng, 0, 0, &err); return (long)dbg; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -ldwarf -lelf -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) > /dev/null 2>&1 && echo y"), y) | 483 | ifneq ($(shell sh -c "(echo '\#include <libdwarf/dwarf.h>'; echo '\#include <libdwarf/libdwarf.h>'; echo 'int main(void) { Dwarf_Debug dbg; Dwarf_Error err; Dwarf_Ranges *rng; dwarf_init(0, DW_DLC_READ, 0, 0, &dbg, &err); dwarf_get_ranges(dbg, 0, &rng, 0, 0, &err); return (long)dbg; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -ldwarf -lelf -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) "$(QUIET_STDERR)" && echo y"), y) |
491 | msg := $(warning No libdwarf.h found or old libdwarf.h found, disables dwarf support. Please install libdwarf-dev/libdwarf-devel >= 20081231); | 484 | msg := $(warning No libdwarf.h found or old libdwarf.h found, disables dwarf support. Please install libdwarf-dev/libdwarf-devel >= 20081231); |
492 | BASIC_CFLAGS += -DNO_LIBDWARF | 485 | BASIC_CFLAGS += -DNO_LIBDWARF |
493 | else | 486 | else |
@@ -499,25 +492,25 @@ endif | |||
499 | ifdef NO_DEMANGLE | 492 | ifdef NO_DEMANGLE |
500 | BASIC_CFLAGS += -DNO_DEMANGLE | 493 | BASIC_CFLAGS += -DNO_DEMANGLE |
501 | else | 494 | else |
502 | has_bfd := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) -lbfd > /dev/null 2>&1 && echo y") | 495 | has_bfd := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) -lbfd "$(QUIET_STDERR)" && echo y") |
503 | 496 | ||
504 | ifeq ($(has_bfd),y) | 497 | ifeq ($(has_bfd),y) |
505 | EXTLIBS += -lbfd | 498 | EXTLIBS += -lbfd |
506 | else | 499 | else |
507 | has_bfd_iberty := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) -lbfd -liberty > /dev/null 2>&1 && echo y") | 500 | has_bfd_iberty := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) -lbfd -liberty "$(QUIET_STDERR)" && echo y") |
508 | ifeq ($(has_bfd_iberty),y) | 501 | ifeq ($(has_bfd_iberty),y) |
509 | EXTLIBS += -lbfd -liberty | 502 | EXTLIBS += -lbfd -liberty |
510 | else | 503 | else |
511 | has_bfd_iberty_z := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) -lbfd -liberty -lz > /dev/null 2>&1 && echo y") | 504 | has_bfd_iberty_z := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) -lbfd -liberty -lz "$(QUIET_STDERR)" && echo y") |
512 | ifeq ($(has_bfd_iberty_z),y) | 505 | ifeq ($(has_bfd_iberty_z),y) |
513 | EXTLIBS += -lbfd -liberty -lz | 506 | EXTLIBS += -lbfd -liberty -lz |
514 | else | 507 | else |
515 | has_cplus_demangle := $(shell sh -c "(echo 'extern char *cplus_demangle(const char *, int);'; echo 'int main(void) { cplus_demangle(0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) -liberty > /dev/null 2>&1 && echo y") | 508 | has_cplus_demangle := $(shell sh -c "(echo 'extern char *cplus_demangle(const char *, int);'; echo 'int main(void) { cplus_demangle(0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) -liberty "$(QUIET_STDERR)" && echo y") |
516 | ifeq ($(has_cplus_demangle),y) | 509 | ifeq ($(has_cplus_demangle),y) |
517 | EXTLIBS += -liberty | 510 | EXTLIBS += -liberty |
518 | BASIC_CFLAGS += -DHAVE_CPLUS_DEMANGLE | 511 | BASIC_CFLAGS += -DHAVE_CPLUS_DEMANGLE |
519 | else | 512 | else |
520 | msg := $(warning No bfd.h/libbfd found, install binutils-dev[el] to gain symbol demangling) | 513 | msg := $(warning No bfd.h/libbfd found, install binutils-dev[el]/zlib-static to gain symbol demangling) |
521 | BASIC_CFLAGS += -DNO_DEMANGLE | 514 | BASIC_CFLAGS += -DNO_DEMANGLE |
522 | endif | 515 | endif |
523 | endif | 516 | endif |
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 77d50a6d6802..6b13a1ecf1e7 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c | |||
@@ -33,9 +33,11 @@ static int input; | |||
33 | static int full_paths; | 33 | static int full_paths; |
34 | 34 | ||
35 | static int print_line; | 35 | static int print_line; |
36 | static bool use_modules; | ||
36 | 37 | ||
37 | static unsigned long page_size; | 38 | static unsigned long page_size; |
38 | static unsigned long mmap_window = 32; | 39 | static unsigned long mmap_window = 32; |
40 | const char *vmlinux_name; | ||
39 | 41 | ||
40 | struct sym_hist { | 42 | struct sym_hist { |
41 | u64 sum; | 43 | u64 sum; |
@@ -156,7 +158,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) | |||
156 | 158 | ||
157 | if (event->header.misc & PERF_RECORD_MISC_KERNEL) { | 159 | if (event->header.misc & PERF_RECORD_MISC_KERNEL) { |
158 | level = 'k'; | 160 | level = 'k'; |
159 | sym = kernel_maps__find_symbol(ip, &map); | 161 | sym = kernel_maps__find_symbol(ip, &map, symbol_filter); |
160 | dump_printf(" ...... dso: %s\n", | 162 | dump_printf(" ...... dso: %s\n", |
161 | map ? map->dso->long_name : "<not found>"); | 163 | map ? map->dso->long_name : "<not found>"); |
162 | } else if (event->header.misc & PERF_RECORD_MISC_USER) { | 164 | } else if (event->header.misc & PERF_RECORD_MISC_USER) { |
@@ -636,9 +638,9 @@ static int __cmd_annotate(void) | |||
636 | exit(0); | 638 | exit(0); |
637 | } | 639 | } |
638 | 640 | ||
639 | if (load_kernel(symbol_filter) < 0) { | 641 | if (kernel_maps__init(vmlinux_name, true, use_modules) < 0) { |
640 | perror("failed to load kernel symbols"); | 642 | pr_err("failed to create kernel maps for symbol resolution\b"); |
641 | return EXIT_FAILURE; | 643 | return -1; |
642 | } | 644 | } |
643 | 645 | ||
644 | remap: | 646 | remap: |
@@ -742,7 +744,7 @@ static const struct option options[] = { | |||
742 | OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, | 744 | OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, |
743 | "dump raw trace in ASCII"), | 745 | "dump raw trace in ASCII"), |
744 | OPT_STRING('k', "vmlinux", &vmlinux_name, "file", "vmlinux pathname"), | 746 | OPT_STRING('k', "vmlinux", &vmlinux_name, "file", "vmlinux pathname"), |
745 | OPT_BOOLEAN('m', "modules", &modules, | 747 | OPT_BOOLEAN('m', "modules", &use_modules, |
746 | "load module symbols - WARNING: use only with -k and LIVE kernel"), | 748 | "load module symbols - WARNING: use only with -k and LIVE kernel"), |
747 | OPT_BOOLEAN('l', "print-line", &print_line, | 749 | OPT_BOOLEAN('l', "print-line", &print_line, |
748 | "print matching source lines (may be slow)"), | 750 | "print matching source lines (may be slow)"), |
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c new file mode 100644 index 000000000000..173d6db42ecb --- /dev/null +++ b/tools/perf/builtin-kmem.c | |||
@@ -0,0 +1,833 @@ | |||
1 | #include "builtin.h" | ||
2 | #include "perf.h" | ||
3 | |||
4 | #include "util/util.h" | ||
5 | #include "util/cache.h" | ||
6 | #include "util/symbol.h" | ||
7 | #include "util/thread.h" | ||
8 | #include "util/header.h" | ||
9 | |||
10 | #include "util/parse-options.h" | ||
11 | #include "util/trace-event.h" | ||
12 | |||
13 | #include "util/debug.h" | ||
14 | #include "util/data_map.h" | ||
15 | |||
16 | #include <linux/rbtree.h> | ||
17 | |||
18 | struct alloc_stat; | ||
19 | typedef int (*sort_fn_t)(struct alloc_stat *, struct alloc_stat *); | ||
20 | |||
21 | static char const *input_name = "perf.data"; | ||
22 | |||
23 | static struct perf_header *header; | ||
24 | static u64 sample_type; | ||
25 | |||
26 | static int alloc_flag; | ||
27 | static int caller_flag; | ||
28 | |||
29 | static int alloc_lines = -1; | ||
30 | static int caller_lines = -1; | ||
31 | |||
32 | static bool raw_ip; | ||
33 | |||
34 | static char default_sort_order[] = "frag,hit,bytes"; | ||
35 | |||
36 | static char *cwd; | ||
37 | static int cwdlen; | ||
38 | |||
39 | static int *cpunode_map; | ||
40 | static int max_cpu_num; | ||
41 | |||
42 | struct alloc_stat { | ||
43 | u64 call_site; | ||
44 | u64 ptr; | ||
45 | u64 bytes_req; | ||
46 | u64 bytes_alloc; | ||
47 | u32 hit; | ||
48 | u32 pingpong; | ||
49 | |||
50 | short alloc_cpu; | ||
51 | |||
52 | struct rb_node node; | ||
53 | }; | ||
54 | |||
55 | static struct rb_root root_alloc_stat; | ||
56 | static struct rb_root root_alloc_sorted; | ||
57 | static struct rb_root root_caller_stat; | ||
58 | static struct rb_root root_caller_sorted; | ||
59 | |||
60 | static unsigned long total_requested, total_allocated; | ||
61 | static unsigned long nr_allocs, nr_cross_allocs; | ||
62 | |||
63 | struct raw_event_sample { | ||
64 | u32 size; | ||
65 | char data[0]; | ||
66 | }; | ||
67 | |||
68 | #define PATH_SYS_NODE "/sys/devices/system/node" | ||
69 | |||
70 | static void init_cpunode_map(void) | ||
71 | { | ||
72 | FILE *fp; | ||
73 | int i; | ||
74 | |||
75 | fp = fopen("/sys/devices/system/cpu/kernel_max", "r"); | ||
76 | if (!fp) { | ||
77 | max_cpu_num = 4096; | ||
78 | return; | ||
79 | } | ||
80 | |||
81 | if (fscanf(fp, "%d", &max_cpu_num) < 1) | ||
82 | die("Failed to read 'kernel_max' from sysfs"); | ||
83 | max_cpu_num++; | ||
84 | |||
85 | cpunode_map = calloc(max_cpu_num, sizeof(int)); | ||
86 | if (!cpunode_map) | ||
87 | die("calloc"); | ||
88 | for (i = 0; i < max_cpu_num; i++) | ||
89 | cpunode_map[i] = -1; | ||
90 | fclose(fp); | ||
91 | } | ||
92 | |||
93 | static void setup_cpunode_map(void) | ||
94 | { | ||
95 | struct dirent *dent1, *dent2; | ||
96 | DIR *dir1, *dir2; | ||
97 | unsigned int cpu, mem; | ||
98 | char buf[PATH_MAX]; | ||
99 | |||
100 | init_cpunode_map(); | ||
101 | |||
102 | dir1 = opendir(PATH_SYS_NODE); | ||
103 | if (!dir1) | ||
104 | return; | ||
105 | |||
106 | while (true) { | ||
107 | dent1 = readdir(dir1); | ||
108 | if (!dent1) | ||
109 | break; | ||
110 | |||
111 | if (sscanf(dent1->d_name, "node%u", &mem) < 1) | ||
112 | continue; | ||
113 | |||
114 | snprintf(buf, PATH_MAX, "%s/%s", PATH_SYS_NODE, dent1->d_name); | ||
115 | dir2 = opendir(buf); | ||
116 | if (!dir2) | ||
117 | continue; | ||
118 | while (true) { | ||
119 | dent2 = readdir(dir2); | ||
120 | if (!dent2) | ||
121 | break; | ||
122 | if (sscanf(dent2->d_name, "cpu%u", &cpu) < 1) | ||
123 | continue; | ||
124 | cpunode_map[cpu] = mem; | ||
125 | } | ||
126 | } | ||
127 | } | ||
128 | |||
129 | static int | ||
130 | process_comm_event(event_t *event, unsigned long offset, unsigned long head) | ||
131 | { | ||
132 | struct thread *thread = threads__findnew(event->comm.pid); | ||
133 | |||
134 | dump_printf("%p [%p]: PERF_RECORD_COMM: %s:%d\n", | ||
135 | (void *)(offset + head), | ||
136 | (void *)(long)(event->header.size), | ||
137 | event->comm.comm, event->comm.pid); | ||
138 | |||
139 | if (thread == NULL || | ||
140 | thread__set_comm(thread, event->comm.comm)) { | ||
141 | dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n"); | ||
142 | return -1; | ||
143 | } | ||
144 | |||
145 | return 0; | ||
146 | } | ||
147 | |||
148 | static void insert_alloc_stat(unsigned long call_site, unsigned long ptr, | ||
149 | int bytes_req, int bytes_alloc, int cpu) | ||
150 | { | ||
151 | struct rb_node **node = &root_alloc_stat.rb_node; | ||
152 | struct rb_node *parent = NULL; | ||
153 | struct alloc_stat *data = NULL; | ||
154 | |||
155 | while (*node) { | ||
156 | parent = *node; | ||
157 | data = rb_entry(*node, struct alloc_stat, node); | ||
158 | |||
159 | if (ptr > data->ptr) | ||
160 | node = &(*node)->rb_right; | ||
161 | else if (ptr < data->ptr) | ||
162 | node = &(*node)->rb_left; | ||
163 | else | ||
164 | break; | ||
165 | } | ||
166 | |||
167 | if (data && data->ptr == ptr) { | ||
168 | data->hit++; | ||
169 | data->bytes_req += bytes_req; | ||
170 | data->bytes_alloc += bytes_req; | ||
171 | } else { | ||
172 | data = malloc(sizeof(*data)); | ||
173 | if (!data) | ||
174 | die("malloc"); | ||
175 | data->ptr = ptr; | ||
176 | data->pingpong = 0; | ||
177 | data->hit = 1; | ||
178 | data->bytes_req = bytes_req; | ||
179 | data->bytes_alloc = bytes_alloc; | ||
180 | |||
181 | rb_link_node(&data->node, parent, node); | ||
182 | rb_insert_color(&data->node, &root_alloc_stat); | ||
183 | } | ||
184 | data->call_site = call_site; | ||
185 | data->alloc_cpu = cpu; | ||
186 | } | ||
187 | |||
188 | static void insert_caller_stat(unsigned long call_site, | ||
189 | int bytes_req, int bytes_alloc) | ||
190 | { | ||
191 | struct rb_node **node = &root_caller_stat.rb_node; | ||
192 | struct rb_node *parent = NULL; | ||
193 | struct alloc_stat *data = NULL; | ||
194 | |||
195 | while (*node) { | ||
196 | parent = *node; | ||
197 | data = rb_entry(*node, struct alloc_stat, node); | ||
198 | |||
199 | if (call_site > data->call_site) | ||
200 | node = &(*node)->rb_right; | ||
201 | else if (call_site < data->call_site) | ||
202 | node = &(*node)->rb_left; | ||
203 | else | ||
204 | break; | ||
205 | } | ||
206 | |||
207 | if (data && data->call_site == call_site) { | ||
208 | data->hit++; | ||
209 | data->bytes_req += bytes_req; | ||
210 | data->bytes_alloc += bytes_req; | ||
211 | } else { | ||
212 | data = malloc(sizeof(*data)); | ||
213 | if (!data) | ||
214 | die("malloc"); | ||
215 | data->call_site = call_site; | ||
216 | data->pingpong = 0; | ||
217 | data->hit = 1; | ||
218 | data->bytes_req = bytes_req; | ||
219 | data->bytes_alloc = bytes_alloc; | ||
220 | |||
221 | rb_link_node(&data->node, parent, node); | ||
222 | rb_insert_color(&data->node, &root_caller_stat); | ||
223 | } | ||
224 | } | ||
225 | |||
226 | static void process_alloc_event(struct raw_event_sample *raw, | ||
227 | struct event *event, | ||
228 | int cpu, | ||
229 | u64 timestamp __used, | ||
230 | struct thread *thread __used, | ||
231 | int node) | ||
232 | { | ||
233 | unsigned long call_site; | ||
234 | unsigned long ptr; | ||
235 | int bytes_req; | ||
236 | int bytes_alloc; | ||
237 | int node1, node2; | ||
238 | |||
239 | ptr = raw_field_value(event, "ptr", raw->data); | ||
240 | call_site = raw_field_value(event, "call_site", raw->data); | ||
241 | bytes_req = raw_field_value(event, "bytes_req", raw->data); | ||
242 | bytes_alloc = raw_field_value(event, "bytes_alloc", raw->data); | ||
243 | |||
244 | insert_alloc_stat(call_site, ptr, bytes_req, bytes_alloc, cpu); | ||
245 | insert_caller_stat(call_site, bytes_req, bytes_alloc); | ||
246 | |||
247 | total_requested += bytes_req; | ||
248 | total_allocated += bytes_alloc; | ||
249 | |||
250 | if (node) { | ||
251 | node1 = cpunode_map[cpu]; | ||
252 | node2 = raw_field_value(event, "node", raw->data); | ||
253 | if (node1 != node2) | ||
254 | nr_cross_allocs++; | ||
255 | } | ||
256 | nr_allocs++; | ||
257 | } | ||
258 | |||
259 | static int ptr_cmp(struct alloc_stat *, struct alloc_stat *); | ||
260 | static int callsite_cmp(struct alloc_stat *, struct alloc_stat *); | ||
261 | |||
262 | static struct alloc_stat *search_alloc_stat(unsigned long ptr, | ||
263 | unsigned long call_site, | ||
264 | struct rb_root *root, | ||
265 | sort_fn_t sort_fn) | ||
266 | { | ||
267 | struct rb_node *node = root->rb_node; | ||
268 | struct alloc_stat key = { .ptr = ptr, .call_site = call_site }; | ||
269 | |||
270 | while (node) { | ||
271 | struct alloc_stat *data; | ||
272 | int cmp; | ||
273 | |||
274 | data = rb_entry(node, struct alloc_stat, node); | ||
275 | |||
276 | cmp = sort_fn(&key, data); | ||
277 | if (cmp < 0) | ||
278 | node = node->rb_left; | ||
279 | else if (cmp > 0) | ||
280 | node = node->rb_right; | ||
281 | else | ||
282 | return data; | ||
283 | } | ||
284 | return NULL; | ||
285 | } | ||
286 | |||
287 | static void process_free_event(struct raw_event_sample *raw, | ||
288 | struct event *event, | ||
289 | int cpu, | ||
290 | u64 timestamp __used, | ||
291 | struct thread *thread __used) | ||
292 | { | ||
293 | unsigned long ptr; | ||
294 | struct alloc_stat *s_alloc, *s_caller; | ||
295 | |||
296 | ptr = raw_field_value(event, "ptr", raw->data); | ||
297 | |||
298 | s_alloc = search_alloc_stat(ptr, 0, &root_alloc_stat, ptr_cmp); | ||
299 | if (!s_alloc) | ||
300 | return; | ||
301 | |||
302 | if (cpu != s_alloc->alloc_cpu) { | ||
303 | s_alloc->pingpong++; | ||
304 | |||
305 | s_caller = search_alloc_stat(0, s_alloc->call_site, | ||
306 | &root_caller_stat, callsite_cmp); | ||
307 | assert(s_caller); | ||
308 | s_caller->pingpong++; | ||
309 | } | ||
310 | s_alloc->alloc_cpu = -1; | ||
311 | } | ||
312 | |||
313 | static void | ||
314 | process_raw_event(event_t *raw_event __used, void *more_data, | ||
315 | int cpu, u64 timestamp, struct thread *thread) | ||
316 | { | ||
317 | struct raw_event_sample *raw = more_data; | ||
318 | struct event *event; | ||
319 | int type; | ||
320 | |||
321 | type = trace_parse_common_type(raw->data); | ||
322 | event = trace_find_event(type); | ||
323 | |||
324 | if (!strcmp(event->name, "kmalloc") || | ||
325 | !strcmp(event->name, "kmem_cache_alloc")) { | ||
326 | process_alloc_event(raw, event, cpu, timestamp, thread, 0); | ||
327 | return; | ||
328 | } | ||
329 | |||
330 | if (!strcmp(event->name, "kmalloc_node") || | ||
331 | !strcmp(event->name, "kmem_cache_alloc_node")) { | ||
332 | process_alloc_event(raw, event, cpu, timestamp, thread, 1); | ||
333 | return; | ||
334 | } | ||
335 | |||
336 | if (!strcmp(event->name, "kfree") || | ||
337 | !strcmp(event->name, "kmem_cache_free")) { | ||
338 | process_free_event(raw, event, cpu, timestamp, thread); | ||
339 | return; | ||
340 | } | ||
341 | } | ||
342 | |||
343 | static int | ||
344 | process_sample_event(event_t *event, unsigned long offset, unsigned long head) | ||
345 | { | ||
346 | u64 ip = event->ip.ip; | ||
347 | u64 timestamp = -1; | ||
348 | u32 cpu = -1; | ||
349 | u64 period = 1; | ||
350 | void *more_data = event->ip.__more_data; | ||
351 | struct thread *thread = threads__findnew(event->ip.pid); | ||
352 | |||
353 | if (sample_type & PERF_SAMPLE_TIME) { | ||
354 | timestamp = *(u64 *)more_data; | ||
355 | more_data += sizeof(u64); | ||
356 | } | ||
357 | |||
358 | if (sample_type & PERF_SAMPLE_CPU) { | ||
359 | cpu = *(u32 *)more_data; | ||
360 | more_data += sizeof(u32); | ||
361 | more_data += sizeof(u32); /* reserved */ | ||
362 | } | ||
363 | |||
364 | if (sample_type & PERF_SAMPLE_PERIOD) { | ||
365 | period = *(u64 *)more_data; | ||
366 | more_data += sizeof(u64); | ||
367 | } | ||
368 | |||
369 | dump_printf("%p [%p]: PERF_RECORD_SAMPLE (IP, %d): %d/%d: %p period: %Ld\n", | ||
370 | (void *)(offset + head), | ||
371 | (void *)(long)(event->header.size), | ||
372 | event->header.misc, | ||
373 | event->ip.pid, event->ip.tid, | ||
374 | (void *)(long)ip, | ||
375 | (long long)period); | ||
376 | |||
377 | if (thread == NULL) { | ||
378 | pr_debug("problem processing %d event, skipping it.\n", | ||
379 | event->header.type); | ||
380 | return -1; | ||
381 | } | ||
382 | |||
383 | dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid); | ||
384 | |||
385 | process_raw_event(event, more_data, cpu, timestamp, thread); | ||
386 | |||
387 | return 0; | ||
388 | } | ||
389 | |||
390 | static int sample_type_check(u64 type) | ||
391 | { | ||
392 | sample_type = type; | ||
393 | |||
394 | if (!(sample_type & PERF_SAMPLE_RAW)) { | ||
395 | fprintf(stderr, | ||
396 | "No trace sample to read. Did you call perf record " | ||
397 | "without -R?"); | ||
398 | return -1; | ||
399 | } | ||
400 | |||
401 | return 0; | ||
402 | } | ||
403 | |||
404 | static struct perf_file_handler file_handler = { | ||
405 | .process_sample_event = process_sample_event, | ||
406 | .process_comm_event = process_comm_event, | ||
407 | .sample_type_check = sample_type_check, | ||
408 | }; | ||
409 | |||
410 | static int read_events(void) | ||
411 | { | ||
412 | register_idle_thread(); | ||
413 | register_perf_file_handler(&file_handler); | ||
414 | |||
415 | return mmap_dispatch_perf_file(&header, input_name, NULL, false, 0, 0, | ||
416 | &cwdlen, &cwd); | ||
417 | } | ||
418 | |||
419 | static double fragmentation(unsigned long n_req, unsigned long n_alloc) | ||
420 | { | ||
421 | if (n_alloc == 0) | ||
422 | return 0.0; | ||
423 | else | ||
424 | return 100.0 - (100.0 * n_req / n_alloc); | ||
425 | } | ||
426 | |||
427 | static void __print_result(struct rb_root *root, int n_lines, int is_caller) | ||
428 | { | ||
429 | struct rb_node *next; | ||
430 | |||
431 | printf("%.102s\n", graph_dotted_line); | ||
432 | printf(" %-34s |", is_caller ? "Callsite": "Alloc Ptr"); | ||
433 | printf(" Total_alloc/Per | Total_req/Per | Hit | Ping-pong | Frag\n"); | ||
434 | printf("%.102s\n", graph_dotted_line); | ||
435 | |||
436 | next = rb_first(root); | ||
437 | |||
438 | while (next && n_lines--) { | ||
439 | struct alloc_stat *data = rb_entry(next, struct alloc_stat, | ||
440 | node); | ||
441 | struct symbol *sym = NULL; | ||
442 | char buf[BUFSIZ]; | ||
443 | u64 addr; | ||
444 | |||
445 | if (is_caller) { | ||
446 | addr = data->call_site; | ||
447 | if (!raw_ip) | ||
448 | sym = kernel_maps__find_symbol(addr, | ||
449 | NULL, NULL); | ||
450 | } else | ||
451 | addr = data->ptr; | ||
452 | |||
453 | if (sym != NULL) | ||
454 | snprintf(buf, sizeof(buf), "%s+%Lx", sym->name, | ||
455 | addr - sym->start); | ||
456 | else | ||
457 | snprintf(buf, sizeof(buf), "%#Lx", addr); | ||
458 | printf(" %-34s |", buf); | ||
459 | |||
460 | printf(" %9llu/%-5lu | %9llu/%-5lu | %6lu | %8lu | %6.3f%%\n", | ||
461 | (unsigned long long)data->bytes_alloc, | ||
462 | (unsigned long)data->bytes_alloc / data->hit, | ||
463 | (unsigned long long)data->bytes_req, | ||
464 | (unsigned long)data->bytes_req / data->hit, | ||
465 | (unsigned long)data->hit, | ||
466 | (unsigned long)data->pingpong, | ||
467 | fragmentation(data->bytes_req, data->bytes_alloc)); | ||
468 | |||
469 | next = rb_next(next); | ||
470 | } | ||
471 | |||
472 | if (n_lines == -1) | ||
473 | printf(" ... | ... | ... | ... | ... | ... \n"); | ||
474 | |||
475 | printf("%.102s\n", graph_dotted_line); | ||
476 | } | ||
477 | |||
478 | static void print_summary(void) | ||
479 | { | ||
480 | printf("\nSUMMARY\n=======\n"); | ||
481 | printf("Total bytes requested: %lu\n", total_requested); | ||
482 | printf("Total bytes allocated: %lu\n", total_allocated); | ||
483 | printf("Total bytes wasted on internal fragmentation: %lu\n", | ||
484 | total_allocated - total_requested); | ||
485 | printf("Internal fragmentation: %f%%\n", | ||
486 | fragmentation(total_requested, total_allocated)); | ||
487 | printf("Cross CPU allocations: %lu/%lu\n", nr_cross_allocs, nr_allocs); | ||
488 | } | ||
489 | |||
490 | static void print_result(void) | ||
491 | { | ||
492 | if (caller_flag) | ||
493 | __print_result(&root_caller_sorted, caller_lines, 1); | ||
494 | if (alloc_flag) | ||
495 | __print_result(&root_alloc_sorted, alloc_lines, 0); | ||
496 | print_summary(); | ||
497 | } | ||
498 | |||
499 | struct sort_dimension { | ||
500 | const char name[20]; | ||
501 | sort_fn_t cmp; | ||
502 | struct list_head list; | ||
503 | }; | ||
504 | |||
505 | static LIST_HEAD(caller_sort); | ||
506 | static LIST_HEAD(alloc_sort); | ||
507 | |||
508 | static void sort_insert(struct rb_root *root, struct alloc_stat *data, | ||
509 | struct list_head *sort_list) | ||
510 | { | ||
511 | struct rb_node **new = &(root->rb_node); | ||
512 | struct rb_node *parent = NULL; | ||
513 | struct sort_dimension *sort; | ||
514 | |||
515 | while (*new) { | ||
516 | struct alloc_stat *this; | ||
517 | int cmp = 0; | ||
518 | |||
519 | this = rb_entry(*new, struct alloc_stat, node); | ||
520 | parent = *new; | ||
521 | |||
522 | list_for_each_entry(sort, sort_list, list) { | ||
523 | cmp = sort->cmp(data, this); | ||
524 | if (cmp) | ||
525 | break; | ||
526 | } | ||
527 | |||
528 | if (cmp > 0) | ||
529 | new = &((*new)->rb_left); | ||
530 | else | ||
531 | new = &((*new)->rb_right); | ||
532 | } | ||
533 | |||
534 | rb_link_node(&data->node, parent, new); | ||
535 | rb_insert_color(&data->node, root); | ||
536 | } | ||
537 | |||
538 | static void __sort_result(struct rb_root *root, struct rb_root *root_sorted, | ||
539 | struct list_head *sort_list) | ||
540 | { | ||
541 | struct rb_node *node; | ||
542 | struct alloc_stat *data; | ||
543 | |||
544 | for (;;) { | ||
545 | node = rb_first(root); | ||
546 | if (!node) | ||
547 | break; | ||
548 | |||
549 | rb_erase(node, root); | ||
550 | data = rb_entry(node, struct alloc_stat, node); | ||
551 | sort_insert(root_sorted, data, sort_list); | ||
552 | } | ||
553 | } | ||
554 | |||
555 | static void sort_result(void) | ||
556 | { | ||
557 | __sort_result(&root_alloc_stat, &root_alloc_sorted, &alloc_sort); | ||
558 | __sort_result(&root_caller_stat, &root_caller_sorted, &caller_sort); | ||
559 | } | ||
560 | |||
561 | static int __cmd_kmem(void) | ||
562 | { | ||
563 | setup_pager(); | ||
564 | read_events(); | ||
565 | sort_result(); | ||
566 | print_result(); | ||
567 | |||
568 | return 0; | ||
569 | } | ||
570 | |||
571 | static const char * const kmem_usage[] = { | ||
572 | "perf kmem [<options>] {record}", | ||
573 | NULL | ||
574 | }; | ||
575 | |||
576 | static int ptr_cmp(struct alloc_stat *l, struct alloc_stat *r) | ||
577 | { | ||
578 | if (l->ptr < r->ptr) | ||
579 | return -1; | ||
580 | else if (l->ptr > r->ptr) | ||
581 | return 1; | ||
582 | return 0; | ||
583 | } | ||
584 | |||
585 | static struct sort_dimension ptr_sort_dimension = { | ||
586 | .name = "ptr", | ||
587 | .cmp = ptr_cmp, | ||
588 | }; | ||
589 | |||
590 | static int callsite_cmp(struct alloc_stat *l, struct alloc_stat *r) | ||
591 | { | ||
592 | if (l->call_site < r->call_site) | ||
593 | return -1; | ||
594 | else if (l->call_site > r->call_site) | ||
595 | return 1; | ||
596 | return 0; | ||
597 | } | ||
598 | |||
599 | static struct sort_dimension callsite_sort_dimension = { | ||
600 | .name = "callsite", | ||
601 | .cmp = callsite_cmp, | ||
602 | }; | ||
603 | |||
604 | static int hit_cmp(struct alloc_stat *l, struct alloc_stat *r) | ||
605 | { | ||
606 | if (l->hit < r->hit) | ||
607 | return -1; | ||
608 | else if (l->hit > r->hit) | ||
609 | return 1; | ||
610 | return 0; | ||
611 | } | ||
612 | |||
613 | static struct sort_dimension hit_sort_dimension = { | ||
614 | .name = "hit", | ||
615 | .cmp = hit_cmp, | ||
616 | }; | ||
617 | |||
618 | static int bytes_cmp(struct alloc_stat *l, struct alloc_stat *r) | ||
619 | { | ||
620 | if (l->bytes_alloc < r->bytes_alloc) | ||
621 | return -1; | ||
622 | else if (l->bytes_alloc > r->bytes_alloc) | ||
623 | return 1; | ||
624 | return 0; | ||
625 | } | ||
626 | |||
627 | static struct sort_dimension bytes_sort_dimension = { | ||
628 | .name = "bytes", | ||
629 | .cmp = bytes_cmp, | ||
630 | }; | ||
631 | |||
632 | static int frag_cmp(struct alloc_stat *l, struct alloc_stat *r) | ||
633 | { | ||
634 | double x, y; | ||
635 | |||
636 | x = fragmentation(l->bytes_req, l->bytes_alloc); | ||
637 | y = fragmentation(r->bytes_req, r->bytes_alloc); | ||
638 | |||
639 | if (x < y) | ||
640 | return -1; | ||
641 | else if (x > y) | ||
642 | return 1; | ||
643 | return 0; | ||
644 | } | ||
645 | |||
646 | static struct sort_dimension frag_sort_dimension = { | ||
647 | .name = "frag", | ||
648 | .cmp = frag_cmp, | ||
649 | }; | ||
650 | |||
651 | static int pingpong_cmp(struct alloc_stat *l, struct alloc_stat *r) | ||
652 | { | ||
653 | if (l->pingpong < r->pingpong) | ||
654 | return -1; | ||
655 | else if (l->pingpong > r->pingpong) | ||
656 | return 1; | ||
657 | return 0; | ||
658 | } | ||
659 | |||
660 | static struct sort_dimension pingpong_sort_dimension = { | ||
661 | .name = "pingpong", | ||
662 | .cmp = pingpong_cmp, | ||
663 | }; | ||
664 | |||
665 | static struct sort_dimension *avail_sorts[] = { | ||
666 | &ptr_sort_dimension, | ||
667 | &callsite_sort_dimension, | ||
668 | &hit_sort_dimension, | ||
669 | &bytes_sort_dimension, | ||
670 | &frag_sort_dimension, | ||
671 | &pingpong_sort_dimension, | ||
672 | }; | ||
673 | |||
674 | #define NUM_AVAIL_SORTS \ | ||
675 | (int)(sizeof(avail_sorts) / sizeof(struct sort_dimension *)) | ||
676 | |||
677 | static int sort_dimension__add(const char *tok, struct list_head *list) | ||
678 | { | ||
679 | struct sort_dimension *sort; | ||
680 | int i; | ||
681 | |||
682 | for (i = 0; i < NUM_AVAIL_SORTS; i++) { | ||
683 | if (!strcmp(avail_sorts[i]->name, tok)) { | ||
684 | sort = malloc(sizeof(*sort)); | ||
685 | if (!sort) | ||
686 | die("malloc"); | ||
687 | memcpy(sort, avail_sorts[i], sizeof(*sort)); | ||
688 | list_add_tail(&sort->list, list); | ||
689 | return 0; | ||
690 | } | ||
691 | } | ||
692 | |||
693 | return -1; | ||
694 | } | ||
695 | |||
696 | static int setup_sorting(struct list_head *sort_list, const char *arg) | ||
697 | { | ||
698 | char *tok; | ||
699 | char *str = strdup(arg); | ||
700 | |||
701 | if (!str) | ||
702 | die("strdup"); | ||
703 | |||
704 | while (true) { | ||
705 | tok = strsep(&str, ","); | ||
706 | if (!tok) | ||
707 | break; | ||
708 | if (sort_dimension__add(tok, sort_list) < 0) { | ||
709 | error("Unknown --sort key: '%s'", tok); | ||
710 | return -1; | ||
711 | } | ||
712 | } | ||
713 | |||
714 | free(str); | ||
715 | return 0; | ||
716 | } | ||
717 | |||
718 | static int parse_sort_opt(const struct option *opt __used, | ||
719 | const char *arg, int unset __used) | ||
720 | { | ||
721 | if (!arg) | ||
722 | return -1; | ||
723 | |||
724 | if (caller_flag > alloc_flag) | ||
725 | return setup_sorting(&caller_sort, arg); | ||
726 | else | ||
727 | return setup_sorting(&alloc_sort, arg); | ||
728 | |||
729 | return 0; | ||
730 | } | ||
731 | |||
732 | static int parse_stat_opt(const struct option *opt __used, | ||
733 | const char *arg, int unset __used) | ||
734 | { | ||
735 | if (!arg) | ||
736 | return -1; | ||
737 | |||
738 | if (strcmp(arg, "alloc") == 0) | ||
739 | alloc_flag = (caller_flag + 1); | ||
740 | else if (strcmp(arg, "caller") == 0) | ||
741 | caller_flag = (alloc_flag + 1); | ||
742 | else | ||
743 | return -1; | ||
744 | return 0; | ||
745 | } | ||
746 | |||
747 | static int parse_line_opt(const struct option *opt __used, | ||
748 | const char *arg, int unset __used) | ||
749 | { | ||
750 | int lines; | ||
751 | |||
752 | if (!arg) | ||
753 | return -1; | ||
754 | |||
755 | lines = strtoul(arg, NULL, 10); | ||
756 | |||
757 | if (caller_flag > alloc_flag) | ||
758 | caller_lines = lines; | ||
759 | else | ||
760 | alloc_lines = lines; | ||
761 | |||
762 | return 0; | ||
763 | } | ||
764 | |||
765 | static const struct option kmem_options[] = { | ||
766 | OPT_STRING('i', "input", &input_name, "file", | ||
767 | "input file name"), | ||
768 | OPT_CALLBACK(0, "stat", NULL, "<alloc>|<caller>", | ||
769 | "stat selector, Pass 'alloc' or 'caller'.", | ||
770 | parse_stat_opt), | ||
771 | OPT_CALLBACK('s', "sort", NULL, "key[,key2...]", | ||
772 | "sort by keys: ptr, call_site, bytes, hit, pingpong, frag", | ||
773 | parse_sort_opt), | ||
774 | OPT_CALLBACK('l', "line", NULL, "num", | ||
775 | "show n lins", | ||
776 | parse_line_opt), | ||
777 | OPT_BOOLEAN(0, "raw-ip", &raw_ip, "show raw ip instead of symbol"), | ||
778 | OPT_END() | ||
779 | }; | ||
780 | |||
781 | static const char *record_args[] = { | ||
782 | "record", | ||
783 | "-a", | ||
784 | "-R", | ||
785 | "-M", | ||
786 | "-f", | ||
787 | "-c", "1", | ||
788 | "-e", "kmem:kmalloc", | ||
789 | "-e", "kmem:kmalloc_node", | ||
790 | "-e", "kmem:kfree", | ||
791 | "-e", "kmem:kmem_cache_alloc", | ||
792 | "-e", "kmem:kmem_cache_alloc_node", | ||
793 | "-e", "kmem:kmem_cache_free", | ||
794 | }; | ||
795 | |||
796 | static int __cmd_record(int argc, const char **argv) | ||
797 | { | ||
798 | unsigned int rec_argc, i, j; | ||
799 | const char **rec_argv; | ||
800 | |||
801 | rec_argc = ARRAY_SIZE(record_args) + argc - 1; | ||
802 | rec_argv = calloc(rec_argc + 1, sizeof(char *)); | ||
803 | |||
804 | for (i = 0; i < ARRAY_SIZE(record_args); i++) | ||
805 | rec_argv[i] = strdup(record_args[i]); | ||
806 | |||
807 | for (j = 1; j < (unsigned int)argc; j++, i++) | ||
808 | rec_argv[i] = argv[j]; | ||
809 | |||
810 | return cmd_record(i, rec_argv, NULL); | ||
811 | } | ||
812 | |||
813 | int cmd_kmem(int argc, const char **argv, const char *prefix __used) | ||
814 | { | ||
815 | symbol__init(0); | ||
816 | |||
817 | argc = parse_options(argc, argv, kmem_options, kmem_usage, 0); | ||
818 | |||
819 | if (argc && !strncmp(argv[0], "rec", 3)) | ||
820 | return __cmd_record(argc, argv); | ||
821 | else if (argc) | ||
822 | usage_with_options(kmem_usage, kmem_options); | ||
823 | |||
824 | if (list_empty(&caller_sort)) | ||
825 | setup_sorting(&caller_sort, default_sort_order); | ||
826 | if (list_empty(&alloc_sort)) | ||
827 | setup_sorting(&alloc_sort, default_sort_order); | ||
828 | |||
829 | setup_cpunode_map(); | ||
830 | |||
831 | return __cmd_kmem(); | ||
832 | } | ||
833 | |||
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 82260c56db3d..0e519c667e3a 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c | |||
@@ -307,6 +307,12 @@ try_again: | |||
307 | printf("\n"); | 307 | printf("\n"); |
308 | error("perfcounter syscall returned with %d (%s)\n", | 308 | error("perfcounter syscall returned with %d (%s)\n", |
309 | fd[nr_cpu][counter], strerror(err)); | 309 | fd[nr_cpu][counter], strerror(err)); |
310 | |||
311 | #if defined(__i386__) || defined(__x86_64__) | ||
312 | if (attr->type == PERF_TYPE_HARDWARE && err == EOPNOTSUPP) | ||
313 | die("No hardware sampling interrupt available. No APIC? If so then you can boot the kernel with the \"lapic\" boot parameter to force-enable it.\n"); | ||
314 | #endif | ||
315 | |||
310 | die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); | 316 | die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); |
311 | exit(-1); | 317 | exit(-1); |
312 | } | 318 | } |
@@ -400,7 +406,7 @@ static int __cmd_record(int argc, const char **argv) | |||
400 | struct stat st; | 406 | struct stat st; |
401 | pid_t pid = 0; | 407 | pid_t pid = 0; |
402 | int flags; | 408 | int flags; |
403 | int ret; | 409 | int err; |
404 | unsigned long waking = 0; | 410 | unsigned long waking = 0; |
405 | 411 | ||
406 | page_size = sysconf(_SC_PAGE_SIZE); | 412 | page_size = sysconf(_SC_PAGE_SIZE); |
@@ -434,16 +440,18 @@ static int __cmd_record(int argc, const char **argv) | |||
434 | exit(-1); | 440 | exit(-1); |
435 | } | 441 | } |
436 | 442 | ||
437 | if (!file_new) | 443 | header = perf_header__new(); |
438 | header = perf_header__read(output); | ||
439 | else | ||
440 | header = perf_header__new(); | ||
441 | |||
442 | if (header == NULL) { | 444 | if (header == NULL) { |
443 | pr_err("Not enough memory for reading perf file header\n"); | 445 | pr_err("Not enough memory for reading perf file header\n"); |
444 | return -1; | 446 | return -1; |
445 | } | 447 | } |
446 | 448 | ||
449 | if (!file_new) { | ||
450 | err = perf_header__read(header, output); | ||
451 | if (err < 0) | ||
452 | return err; | ||
453 | } | ||
454 | |||
447 | if (raw_samples) { | 455 | if (raw_samples) { |
448 | perf_header__set_feat(header, HEADER_TRACE_INFO); | 456 | perf_header__set_feat(header, HEADER_TRACE_INFO); |
449 | } else { | 457 | } else { |
@@ -472,8 +480,11 @@ static int __cmd_record(int argc, const char **argv) | |||
472 | } | 480 | } |
473 | } | 481 | } |
474 | 482 | ||
475 | if (file_new) | 483 | if (file_new) { |
476 | perf_header__write(header, output, false); | 484 | err = perf_header__write(header, output, false); |
485 | if (err < 0) | ||
486 | return err; | ||
487 | } | ||
477 | 488 | ||
478 | if (!system_wide) | 489 | if (!system_wide) |
479 | event__synthesize_thread(pid, process_synthesized_event); | 490 | event__synthesize_thread(pid, process_synthesized_event); |
@@ -527,7 +538,7 @@ static int __cmd_record(int argc, const char **argv) | |||
527 | if (hits == samples) { | 538 | if (hits == samples) { |
528 | if (done) | 539 | if (done) |
529 | break; | 540 | break; |
530 | ret = poll(event_array, nr_poll, -1); | 541 | err = poll(event_array, nr_poll, -1); |
531 | waking++; | 542 | waking++; |
532 | } | 543 | } |
533 | 544 | ||
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 1a806d5f05cf..fe474b7f8ad0 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c | |||
@@ -38,6 +38,7 @@ static char *dso_list_str, *comm_list_str, *sym_list_str, | |||
38 | static struct strlist *dso_list, *comm_list, *sym_list; | 38 | static struct strlist *dso_list, *comm_list, *sym_list; |
39 | 39 | ||
40 | static int force; | 40 | static int force; |
41 | static bool use_modules; | ||
41 | 42 | ||
42 | static int full_paths; | 43 | static int full_paths; |
43 | static int show_nr_samples; | 44 | static int show_nr_samples; |
@@ -51,6 +52,7 @@ static char *pretty_printing_style = default_pretty_printing_style; | |||
51 | static int exclude_other = 1; | 52 | static int exclude_other = 1; |
52 | 53 | ||
53 | static char callchain_default_opt[] = "fractal,0.5"; | 54 | static char callchain_default_opt[] = "fractal,0.5"; |
55 | const char *vmlinux_name; | ||
54 | 56 | ||
55 | static char *cwd; | 57 | static char *cwd; |
56 | static int cwdlen; | 58 | static int cwdlen; |
@@ -448,7 +450,7 @@ got_map: | |||
448 | * trick of looking in the whole kernel symbol list. | 450 | * trick of looking in the whole kernel symbol list. |
449 | */ | 451 | */ |
450 | if ((long long)ip < 0) | 452 | if ((long long)ip < 0) |
451 | return kernel_maps__find_symbol(ip, mapp); | 453 | return kernel_maps__find_symbol(ip, mapp, NULL); |
452 | } | 454 | } |
453 | dump_printf(" ...... dso: %s\n", | 455 | dump_printf(" ...... dso: %s\n", |
454 | map ? map->dso->long_name : "<not found>"); | 456 | map ? map->dso->long_name : "<not found>"); |
@@ -466,7 +468,7 @@ static int call__match(struct symbol *sym) | |||
466 | return 0; | 468 | return 0; |
467 | } | 469 | } |
468 | 470 | ||
469 | static struct symbol **resolve_callchain(struct thread *thread, struct map *map, | 471 | static struct symbol **resolve_callchain(struct thread *thread, |
470 | struct ip_callchain *chain, | 472 | struct ip_callchain *chain, |
471 | struct symbol **parent) | 473 | struct symbol **parent) |
472 | { | 474 | { |
@@ -495,10 +497,10 @@ static struct symbol **resolve_callchain(struct thread *thread, struct map *map, | |||
495 | case PERF_CONTEXT_HV: | 497 | case PERF_CONTEXT_HV: |
496 | break; | 498 | break; |
497 | case PERF_CONTEXT_KERNEL: | 499 | case PERF_CONTEXT_KERNEL: |
498 | sym = kernel_maps__find_symbol(ip, &map); | 500 | sym = kernel_maps__find_symbol(ip, NULL, NULL); |
499 | break; | 501 | break; |
500 | default: | 502 | default: |
501 | sym = resolve_symbol(thread, &map, &ip); | 503 | sym = resolve_symbol(thread, NULL, &ip); |
502 | break; | 504 | break; |
503 | } | 505 | } |
504 | 506 | ||
@@ -528,7 +530,7 @@ hist_entry__add(struct thread *thread, struct map *map, | |||
528 | struct hist_entry *he; | 530 | struct hist_entry *he; |
529 | 531 | ||
530 | if ((sort__has_parent || callchain) && chain) | 532 | if ((sort__has_parent || callchain) && chain) |
531 | syms = resolve_callchain(thread, map, chain, &parent); | 533 | syms = resolve_callchain(thread, chain, &parent); |
532 | 534 | ||
533 | he = __hist_entry__add(thread, map, sym, parent, | 535 | he = __hist_entry__add(thread, map, sym, parent, |
534 | ip, count, level, &hit); | 536 | ip, count, level, &hit); |
@@ -715,7 +717,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) | |||
715 | 717 | ||
716 | if (cpumode == PERF_RECORD_MISC_KERNEL) { | 718 | if (cpumode == PERF_RECORD_MISC_KERNEL) { |
717 | level = 'k'; | 719 | level = 'k'; |
718 | sym = kernel_maps__find_symbol(ip, &map); | 720 | sym = kernel_maps__find_symbol(ip, &map, NULL); |
719 | dump_printf(" ...... dso: %s\n", | 721 | dump_printf(" ...... dso: %s\n", |
720 | map ? map->dso->long_name : "<not found>"); | 722 | map ? map->dso->long_name : "<not found>"); |
721 | } else if (cpumode == PERF_RECORD_MISC_USER) { | 723 | } else if (cpumode == PERF_RECORD_MISC_USER) { |
@@ -924,8 +926,9 @@ static int __cmd_report(void) | |||
924 | 926 | ||
925 | register_perf_file_handler(&file_handler); | 927 | register_perf_file_handler(&file_handler); |
926 | 928 | ||
927 | ret = mmap_dispatch_perf_file(&header, input_name, force, full_paths, | 929 | ret = mmap_dispatch_perf_file(&header, input_name, vmlinux_name, |
928 | &cwdlen, &cwd); | 930 | !vmlinux_name, force, |
931 | full_paths, &cwdlen, &cwd); | ||
929 | if (ret) | 932 | if (ret) |
930 | return ret; | 933 | return ret; |
931 | 934 | ||
@@ -1023,7 +1026,7 @@ static const struct option options[] = { | |||
1023 | "dump raw trace in ASCII"), | 1026 | "dump raw trace in ASCII"), |
1024 | OPT_STRING('k', "vmlinux", &vmlinux_name, "file", "vmlinux pathname"), | 1027 | OPT_STRING('k', "vmlinux", &vmlinux_name, "file", "vmlinux pathname"), |
1025 | OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), | 1028 | OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), |
1026 | OPT_BOOLEAN('m', "modules", &modules, | 1029 | OPT_BOOLEAN('m', "modules", &use_modules, |
1027 | "load module symbols - WARNING: use only with -k and LIVE kernel"), | 1030 | "load module symbols - WARNING: use only with -k and LIVE kernel"), |
1028 | OPT_BOOLEAN('n', "show-nr-samples", &show_nr_samples, | 1031 | OPT_BOOLEAN('n', "show-nr-samples", &show_nr_samples, |
1029 | "Show a column with the number of samples"), | 1032 | "Show a column with the number of samples"), |
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index df44b756cecc..260f57a72ee0 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c | |||
@@ -1718,7 +1718,8 @@ static int read_events(void) | |||
1718 | register_idle_thread(); | 1718 | register_idle_thread(); |
1719 | register_perf_file_handler(&file_handler); | 1719 | register_perf_file_handler(&file_handler); |
1720 | 1720 | ||
1721 | return mmap_dispatch_perf_file(&header, input_name, 0, 0, &cwdlen, &cwd); | 1721 | return mmap_dispatch_perf_file(&header, input_name, NULL, false, 0, 0, |
1722 | &cwdlen, &cwd); | ||
1722 | } | 1723 | } |
1723 | 1724 | ||
1724 | static void print_bad_events(void) | 1725 | static void print_bad_events(void) |
diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c index 665877e4a944..dd4d82ac7aa4 100644 --- a/tools/perf/builtin-timechart.c +++ b/tools/perf/builtin-timechart.c | |||
@@ -1093,7 +1093,7 @@ static void process_samples(void) | |||
1093 | 1093 | ||
1094 | static int __cmd_timechart(void) | 1094 | static int __cmd_timechart(void) |
1095 | { | 1095 | { |
1096 | int ret, rc = EXIT_FAILURE; | 1096 | int err, rc = EXIT_FAILURE; |
1097 | unsigned long offset = 0; | 1097 | unsigned long offset = 0; |
1098 | unsigned long head, shift; | 1098 | unsigned long head, shift; |
1099 | struct stat statbuf; | 1099 | struct stat statbuf; |
@@ -1111,8 +1111,8 @@ static int __cmd_timechart(void) | |||
1111 | exit(-1); | 1111 | exit(-1); |
1112 | } | 1112 | } |
1113 | 1113 | ||
1114 | ret = fstat(input, &statbuf); | 1114 | err = fstat(input, &statbuf); |
1115 | if (ret < 0) { | 1115 | if (err < 0) { |
1116 | perror("failed to stat file"); | 1116 | perror("failed to stat file"); |
1117 | exit(-1); | 1117 | exit(-1); |
1118 | } | 1118 | } |
@@ -1122,7 +1122,16 @@ static int __cmd_timechart(void) | |||
1122 | exit(0); | 1122 | exit(0); |
1123 | } | 1123 | } |
1124 | 1124 | ||
1125 | header = perf_header__read(input); | 1125 | header = perf_header__new(); |
1126 | if (header == NULL) | ||
1127 | return -ENOMEM; | ||
1128 | |||
1129 | err = perf_header__read(header, input); | ||
1130 | if (err < 0) { | ||
1131 | perf_header__delete(header); | ||
1132 | return err; | ||
1133 | } | ||
1134 | |||
1126 | head = header->data_offset; | 1135 | head = header->data_offset; |
1127 | 1136 | ||
1128 | sample_type = perf_header__sample_type(header); | 1137 | sample_type = perf_header__sample_type(header); |
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 07b92c378ae2..6a5de90e9b83 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c | |||
@@ -79,13 +79,7 @@ static int dump_symtab = 0; | |||
79 | static bool hide_kernel_symbols = false; | 79 | static bool hide_kernel_symbols = false; |
80 | static bool hide_user_symbols = false; | 80 | static bool hide_user_symbols = false; |
81 | static struct winsize winsize; | 81 | static struct winsize winsize; |
82 | static const char *graph_line = | 82 | const char *vmlinux_name; |
83 | "_____________________________________________________________________" | ||
84 | "_____________________________________________________________________"; | ||
85 | static const char *graph_dotted_line = | ||
86 | "---------------------------------------------------------------------" | ||
87 | "---------------------------------------------------------------------" | ||
88 | "---------------------------------------------------------------------"; | ||
89 | 83 | ||
90 | /* | 84 | /* |
91 | * Source | 85 | * Source |
@@ -830,6 +824,8 @@ static void handle_keypress(int c) | |||
830 | case 'q': | 824 | case 'q': |
831 | case 'Q': | 825 | case 'Q': |
832 | printf("exiting.\n"); | 826 | printf("exiting.\n"); |
827 | if (dump_symtab) | ||
828 | dsos__fprintf(stderr); | ||
833 | exit(0); | 829 | exit(0); |
834 | case 's': | 830 | case 's': |
835 | prompt_symbol(&sym_filter_entry, "Enter details symbol"); | 831 | prompt_symbol(&sym_filter_entry, "Enter details symbol"); |
@@ -946,17 +942,6 @@ static int symbol_filter(struct map *map, struct symbol *sym) | |||
946 | return 0; | 942 | return 0; |
947 | } | 943 | } |
948 | 944 | ||
949 | static int parse_symbols(void) | ||
950 | { | ||
951 | if (dsos__load_kernel(vmlinux_name, symbol_filter, 1) <= 0) | ||
952 | return -1; | ||
953 | |||
954 | if (dump_symtab) | ||
955 | dsos__fprintf(stderr); | ||
956 | |||
957 | return 0; | ||
958 | } | ||
959 | |||
960 | static void event__process_sample(const event_t *self, int counter) | 945 | static void event__process_sample(const event_t *self, int counter) |
961 | { | 946 | { |
962 | u64 ip = self->ip.ip; | 947 | u64 ip = self->ip.ip; |
@@ -999,7 +984,7 @@ static void event__process_sample(const event_t *self, int counter) | |||
999 | if (hide_kernel_symbols) | 984 | if (hide_kernel_symbols) |
1000 | return; | 985 | return; |
1001 | 986 | ||
1002 | sym = kernel_maps__find_symbol(ip, &map); | 987 | sym = kernel_maps__find_symbol(ip, &map, symbol_filter); |
1003 | if (sym == NULL) | 988 | if (sym == NULL) |
1004 | return; | 989 | return; |
1005 | break; | 990 | break; |
@@ -1326,7 +1311,7 @@ static const struct option options[] = { | |||
1326 | 1311 | ||
1327 | int cmd_top(int argc, const char **argv, const char *prefix __used) | 1312 | int cmd_top(int argc, const char **argv, const char *prefix __used) |
1328 | { | 1313 | { |
1329 | int counter; | 1314 | int counter, err; |
1330 | 1315 | ||
1331 | page_size = sysconf(_SC_PAGE_SIZE); | 1316 | page_size = sysconf(_SC_PAGE_SIZE); |
1332 | 1317 | ||
@@ -1350,10 +1335,11 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) | |||
1350 | if (delay_secs < 1) | 1335 | if (delay_secs < 1) |
1351 | delay_secs = 1; | 1336 | delay_secs = 1; |
1352 | 1337 | ||
1353 | parse_symbols(); | 1338 | err = kernel_maps__init(vmlinux_name, !vmlinux_name, true); |
1339 | if (err < 0) | ||
1340 | return err; | ||
1354 | parse_source(sym_filter_entry); | 1341 | parse_source(sym_filter_entry); |
1355 | 1342 | ||
1356 | |||
1357 | /* | 1343 | /* |
1358 | * User specified count overrides default frequency. | 1344 | * User specified count overrides default frequency. |
1359 | */ | 1345 | */ |
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index d042d656c561..b71198e5dc14 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c | |||
@@ -131,7 +131,8 @@ static int __cmd_trace(void) | |||
131 | register_idle_thread(); | 131 | register_idle_thread(); |
132 | register_perf_file_handler(&file_handler); | 132 | register_perf_file_handler(&file_handler); |
133 | 133 | ||
134 | return mmap_dispatch_perf_file(&header, input_name, 0, 0, &cwdlen, &cwd); | 134 | return mmap_dispatch_perf_file(&header, input_name, NULL, false, |
135 | 0, 0, &cwdlen, &cwd); | ||
135 | } | 136 | } |
136 | 137 | ||
137 | static const char * const annotate_usage[] = { | 138 | static const char * const annotate_usage[] = { |
diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h index 9b02d85091fe..a3d8bf65f26c 100644 --- a/tools/perf/builtin.h +++ b/tools/perf/builtin.h | |||
@@ -28,5 +28,6 @@ extern int cmd_top(int argc, const char **argv, const char *prefix); | |||
28 | extern int cmd_trace(int argc, const char **argv, const char *prefix); | 28 | extern int cmd_trace(int argc, const char **argv, const char *prefix); |
29 | extern int cmd_version(int argc, const char **argv, const char *prefix); | 29 | extern int cmd_version(int argc, const char **argv, const char *prefix); |
30 | extern int cmd_probe(int argc, const char **argv, const char *prefix); | 30 | extern int cmd_probe(int argc, const char **argv, const char *prefix); |
31 | extern int cmd_kmem(int argc, const char **argv, const char *prefix); | ||
31 | 32 | ||
32 | #endif | 33 | #endif |
diff --git a/tools/perf/command-list.txt b/tools/perf/command-list.txt index d3a6e18e4a5e..02b09ea17a3e 100644 --- a/tools/perf/command-list.txt +++ b/tools/perf/command-list.txt | |||
@@ -14,3 +14,4 @@ perf-timechart mainporcelain common | |||
14 | perf-top mainporcelain common | 14 | perf-top mainporcelain common |
15 | perf-trace mainporcelain common | 15 | perf-trace mainporcelain common |
16 | perf-probe mainporcelain common | 16 | perf-probe mainporcelain common |
17 | perf-kmem mainporcelain common | ||
diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 89b82acac7d9..cf64049bc9bd 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c | |||
@@ -285,20 +285,21 @@ static void handle_internal_command(int argc, const char **argv) | |||
285 | { | 285 | { |
286 | const char *cmd = argv[0]; | 286 | const char *cmd = argv[0]; |
287 | static struct cmd_struct commands[] = { | 287 | static struct cmd_struct commands[] = { |
288 | { "help", cmd_help, 0 }, | ||
289 | { "list", cmd_list, 0 }, | ||
290 | { "buildid-list", cmd_buildid_list, 0 }, | 288 | { "buildid-list", cmd_buildid_list, 0 }, |
291 | { "record", cmd_record, 0 }, | 289 | { "help", cmd_help, 0 }, |
292 | { "report", cmd_report, 0 }, | 290 | { "list", cmd_list, 0 }, |
293 | { "bench", cmd_bench, 0 }, | 291 | { "record", cmd_record, 0 }, |
294 | { "stat", cmd_stat, 0 }, | 292 | { "report", cmd_report, 0 }, |
295 | { "timechart", cmd_timechart, 0 }, | 293 | { "bench", cmd_bench, 0 }, |
296 | { "top", cmd_top, 0 }, | 294 | { "stat", cmd_stat, 0 }, |
297 | { "annotate", cmd_annotate, 0 }, | 295 | { "timechart", cmd_timechart, 0 }, |
298 | { "version", cmd_version, 0 }, | 296 | { "top", cmd_top, 0 }, |
299 | { "trace", cmd_trace, 0 }, | 297 | { "annotate", cmd_annotate, 0 }, |
300 | { "sched", cmd_sched, 0 }, | 298 | { "version", cmd_version, 0 }, |
301 | { "probe", cmd_probe, 0 }, | 299 | { "trace", cmd_trace, 0 }, |
300 | { "sched", cmd_sched, 0 }, | ||
301 | { "probe", cmd_probe, 0 }, | ||
302 | { "kmem", cmd_kmem, 0 }, | ||
302 | }; | 303 | }; |
303 | unsigned int i; | 304 | unsigned int i; |
304 | static const char ext[] = STRIP_EXTENSION; | 305 | static const char ext[] = STRIP_EXTENSION; |
diff --git a/tools/perf/util/ctype.c b/tools/perf/util/ctype.c index 0b791bd346bc..35073621e5de 100644 --- a/tools/perf/util/ctype.c +++ b/tools/perf/util/ctype.c | |||
@@ -29,3 +29,11 @@ unsigned char sane_ctype[256] = { | |||
29 | A, A, A, A, A, A, A, A, A, A, A, R, R, P, P, 0, /* 112..127 */ | 29 | A, A, A, A, A, A, A, A, A, A, A, R, R, P, P, 0, /* 112..127 */ |
30 | /* Nothing in the 128.. range */ | 30 | /* Nothing in the 128.. range */ |
31 | }; | 31 | }; |
32 | |||
33 | const char *graph_line = | ||
34 | "_____________________________________________________________________" | ||
35 | "_____________________________________________________________________"; | ||
36 | const char *graph_dotted_line = | ||
37 | "---------------------------------------------------------------------" | ||
38 | "---------------------------------------------------------------------" | ||
39 | "---------------------------------------------------------------------"; | ||
diff --git a/tools/perf/util/data_map.c b/tools/perf/util/data_map.c index 14cb8465eb08..f318d19b2562 100644 --- a/tools/perf/util/data_map.c +++ b/tools/perf/util/data_map.c | |||
@@ -101,12 +101,14 @@ out: | |||
101 | 101 | ||
102 | int mmap_dispatch_perf_file(struct perf_header **pheader, | 102 | int mmap_dispatch_perf_file(struct perf_header **pheader, |
103 | const char *input_name, | 103 | const char *input_name, |
104 | const char *vmlinux_name, | ||
105 | bool try_vmlinux_path, | ||
104 | int force, | 106 | int force, |
105 | int full_paths, | 107 | int full_paths, |
106 | int *cwdlen, | 108 | int *cwdlen, |
107 | char **cwd) | 109 | char **cwd) |
108 | { | 110 | { |
109 | int ret, rc = EXIT_FAILURE; | 111 | int err; |
110 | struct perf_header *header; | 112 | struct perf_header *header; |
111 | unsigned long head, shift; | 113 | unsigned long head, shift; |
112 | unsigned long offset = 0; | 114 | unsigned long offset = 0; |
@@ -118,56 +120,69 @@ int mmap_dispatch_perf_file(struct perf_header **pheader, | |||
118 | int input; | 120 | int input; |
119 | char *buf; | 121 | char *buf; |
120 | 122 | ||
121 | if (!curr_handler) | 123 | if (curr_handler == NULL) { |
122 | die("Forgot to register perf file handler"); | 124 | pr_debug("Forgot to register perf file handler\n"); |
125 | return -EINVAL; | ||
126 | } | ||
123 | 127 | ||
124 | page_size = getpagesize(); | 128 | page_size = getpagesize(); |
125 | 129 | ||
126 | input = open(input_name, O_RDONLY); | 130 | input = open(input_name, O_RDONLY); |
127 | if (input < 0) { | 131 | if (input < 0) { |
128 | fprintf(stderr, " failed to open file: %s", input_name); | 132 | pr_err("Failed to open file: %s", input_name); |
129 | if (!strcmp(input_name, "perf.data")) | 133 | if (!strcmp(input_name, "perf.data")) |
130 | fprintf(stderr, " (try 'perf record' first)"); | 134 | pr_err(" (try 'perf record' first)"); |
131 | fprintf(stderr, "\n"); | 135 | pr_err("\n"); |
132 | exit(-1); | 136 | return -errno; |
133 | } | 137 | } |
134 | 138 | ||
135 | ret = fstat(input, &input_stat); | 139 | if (fstat(input, &input_stat) < 0) { |
136 | if (ret < 0) { | 140 | pr_err("failed to stat file"); |
137 | perror("failed to stat file"); | 141 | err = -errno; |
138 | exit(-1); | 142 | goto out_close; |
139 | } | 143 | } |
140 | 144 | ||
145 | err = -EACCES; | ||
141 | if (!force && input_stat.st_uid && (input_stat.st_uid != geteuid())) { | 146 | if (!force && input_stat.st_uid && (input_stat.st_uid != geteuid())) { |
142 | fprintf(stderr, "file: %s not owned by current user or root\n", | 147 | pr_err("file: %s not owned by current user or root\n", |
143 | input_name); | 148 | input_name); |
144 | exit(-1); | 149 | goto out_close; |
145 | } | 150 | } |
146 | 151 | ||
147 | if (!input_stat.st_size) { | 152 | if (input_stat.st_size == 0) { |
148 | fprintf(stderr, "zero-sized file, nothing to do!\n"); | 153 | pr_info("zero-sized file, nothing to do!\n"); |
149 | exit(0); | 154 | goto done; |
150 | } | 155 | } |
151 | 156 | ||
152 | *pheader = perf_header__read(input); | 157 | err = -ENOMEM; |
153 | header = *pheader; | 158 | header = perf_header__new(); |
159 | if (header == NULL) | ||
160 | goto out_close; | ||
161 | |||
162 | err = perf_header__read(header, input); | ||
163 | if (err < 0) | ||
164 | goto out_delete; | ||
165 | *pheader = header; | ||
154 | head = header->data_offset; | 166 | head = header->data_offset; |
155 | 167 | ||
156 | sample_type = perf_header__sample_type(header); | 168 | sample_type = perf_header__sample_type(header); |
157 | 169 | ||
158 | if (curr_handler->sample_type_check) | 170 | err = -EINVAL; |
159 | if (curr_handler->sample_type_check(sample_type) < 0) | 171 | if (curr_handler->sample_type_check && |
160 | exit(-1); | 172 | curr_handler->sample_type_check(sample_type) < 0) |
173 | goto out_delete; | ||
161 | 174 | ||
162 | if (load_kernel(NULL) < 0) { | 175 | err = -ENOMEM; |
163 | perror("failed to load kernel symbols"); | 176 | if (kernel_maps__init(vmlinux_name, try_vmlinux_path, true) < 0) { |
164 | return EXIT_FAILURE; | 177 | pr_err("failed to setup the kernel maps to resolve symbols\n"); |
178 | goto out_delete; | ||
165 | } | 179 | } |
166 | 180 | ||
167 | if (!full_paths) { | 181 | if (!full_paths) { |
168 | if (getcwd(__cwd, sizeof(__cwd)) == NULL) { | 182 | if (getcwd(__cwd, sizeof(__cwd)) == NULL) { |
169 | perror("failed to get the current directory"); | 183 | pr_err("failed to get the current directory\n"); |
170 | return EXIT_FAILURE; | 184 | err = -errno; |
185 | goto out_delete; | ||
171 | } | 186 | } |
172 | *cwd = __cwd; | 187 | *cwd = __cwd; |
173 | *cwdlen = strlen(*cwd); | 188 | *cwdlen = strlen(*cwd); |
@@ -181,11 +196,12 @@ int mmap_dispatch_perf_file(struct perf_header **pheader, | |||
181 | head -= shift; | 196 | head -= shift; |
182 | 197 | ||
183 | remap: | 198 | remap: |
184 | buf = (char *)mmap(NULL, page_size * mmap_window, PROT_READ, | 199 | buf = mmap(NULL, page_size * mmap_window, PROT_READ, |
185 | MAP_SHARED, input, offset); | 200 | MAP_SHARED, input, offset); |
186 | if (buf == MAP_FAILED) { | 201 | if (buf == MAP_FAILED) { |
187 | perror("failed to mmap file"); | 202 | pr_err("failed to mmap file\n"); |
188 | exit(-1); | 203 | err = -errno; |
204 | goto out_delete; | ||
189 | } | 205 | } |
190 | 206 | ||
191 | more: | 207 | more: |
@@ -242,10 +258,12 @@ more: | |||
242 | goto more; | 258 | goto more; |
243 | 259 | ||
244 | done: | 260 | done: |
245 | rc = EXIT_SUCCESS; | 261 | err = 0; |
262 | out_close: | ||
246 | close(input); | 263 | close(input); |
247 | 264 | ||
248 | return rc; | 265 | return err; |
266 | out_delete: | ||
267 | perf_header__delete(header); | ||
268 | goto out_close; | ||
249 | } | 269 | } |
250 | |||
251 | |||
diff --git a/tools/perf/util/data_map.h b/tools/perf/util/data_map.h index ae036ecd7625..3f0d21b3819e 100644 --- a/tools/perf/util/data_map.h +++ b/tools/perf/util/data_map.h | |||
@@ -23,6 +23,8 @@ struct perf_file_handler { | |||
23 | void register_perf_file_handler(struct perf_file_handler *handler); | 23 | void register_perf_file_handler(struct perf_file_handler *handler); |
24 | int mmap_dispatch_perf_file(struct perf_header **pheader, | 24 | int mmap_dispatch_perf_file(struct perf_header **pheader, |
25 | const char *input_name, | 25 | const char *input_name, |
26 | const char *vmlinux_name, | ||
27 | bool try_vmlinux_path, | ||
26 | int force, | 28 | int force, |
27 | int full_paths, | 29 | int full_paths, |
28 | int *cwdlen, | 30 | int *cwdlen, |
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 1f771ce3a957..f1e392612652 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h | |||
@@ -69,13 +69,6 @@ struct build_id_event { | |||
69 | char filename[]; | 69 | char filename[]; |
70 | }; | 70 | }; |
71 | 71 | ||
72 | struct build_id_list { | ||
73 | struct build_id_event event; | ||
74 | struct list_head list; | ||
75 | const char *dso_name; | ||
76 | int len; | ||
77 | }; | ||
78 | |||
79 | typedef union event_union { | 72 | typedef union event_union { |
80 | struct perf_event_header header; | 73 | struct perf_event_header header; |
81 | struct ip_event ip; | 74 | struct ip_event ip; |
@@ -122,10 +115,13 @@ typedef int (*symbol_filter_t)(struct map *map, struct symbol *sym); | |||
122 | void map__init(struct map *self, u64 start, u64 end, u64 pgoff, | 115 | void map__init(struct map *self, u64 start, u64 end, u64 pgoff, |
123 | struct dso *dso); | 116 | struct dso *dso); |
124 | struct map *map__new(struct mmap_event *event, char *cwd, int cwdlen); | 117 | struct map *map__new(struct mmap_event *event, char *cwd, int cwdlen); |
118 | void map__delete(struct map *self); | ||
125 | struct map *map__clone(struct map *self); | 119 | struct map *map__clone(struct map *self); |
126 | int map__overlap(struct map *l, struct map *r); | 120 | int map__overlap(struct map *l, struct map *r); |
127 | size_t map__fprintf(struct map *self, FILE *fp); | 121 | size_t map__fprintf(struct map *self, FILE *fp); |
128 | struct symbol *map__find_symbol(struct map *self, u64 ip, symbol_filter_t filter); | 122 | struct symbol *map__find_symbol(struct map *self, u64 ip, symbol_filter_t filter); |
123 | void map__fixup_start(struct map *self); | ||
124 | void map__fixup_end(struct map *self); | ||
129 | 125 | ||
130 | int event__synthesize_thread(pid_t pid, int (*process)(event_t *event)); | 126 | int event__synthesize_thread(pid_t pid, int (*process)(event_t *event)); |
131 | void event__synthesize_threads(int (*process)(event_t *event)); | 127 | void event__synthesize_threads(int (*process)(event_t *event)); |
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index b01a9537977f..1332f8ec04aa 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c | |||
@@ -78,16 +78,24 @@ struct perf_header *perf_header__new(void) | |||
78 | return self; | 78 | return self; |
79 | } | 79 | } |
80 | 80 | ||
81 | void perf_header__delete(struct perf_header *self) | ||
82 | { | ||
83 | int i; | ||
84 | |||
85 | for (i = 0; i < self->attrs; ++i) | ||
86 | perf_header_attr__delete(self->attr[i]); | ||
87 | |||
88 | free(self->attr); | ||
89 | free(self); | ||
90 | } | ||
91 | |||
81 | int perf_header__add_attr(struct perf_header *self, | 92 | int perf_header__add_attr(struct perf_header *self, |
82 | struct perf_header_attr *attr) | 93 | struct perf_header_attr *attr) |
83 | { | 94 | { |
84 | int pos = self->attrs; | ||
85 | |||
86 | if (self->frozen) | 95 | if (self->frozen) |
87 | return -1; | 96 | return -1; |
88 | 97 | ||
89 | self->attrs++; | 98 | if (self->attrs == self->size) { |
90 | if (self->attrs > self->size) { | ||
91 | int nsize = self->size * 2; | 99 | int nsize = self->size * 2; |
92 | struct perf_header_attr **nattr; | 100 | struct perf_header_attr **nattr; |
93 | 101 | ||
@@ -98,7 +106,8 @@ int perf_header__add_attr(struct perf_header *self, | |||
98 | self->size = nsize; | 106 | self->size = nsize; |
99 | self->attr = nattr; | 107 | self->attr = nattr; |
100 | } | 108 | } |
101 | self->attr[pos] = attr; | 109 | |
110 | self->attr[self->attrs++] = attr; | ||
102 | return 0; | 111 | return 0; |
103 | } | 112 | } |
104 | 113 | ||
@@ -167,7 +176,7 @@ static int do_write(int fd, const void *buf, size_t size) | |||
167 | int ret = write(fd, buf, size); | 176 | int ret = write(fd, buf, size); |
168 | 177 | ||
169 | if (ret < 0) | 178 | if (ret < 0) |
170 | return -1; | 179 | return -errno; |
171 | 180 | ||
172 | size -= ret; | 181 | size -= ret; |
173 | buf += ret; | 182 | buf += ret; |
@@ -176,43 +185,51 @@ static int do_write(int fd, const void *buf, size_t size) | |||
176 | return 0; | 185 | return 0; |
177 | } | 186 | } |
178 | 187 | ||
179 | static int write_buildid_table(int fd, struct list_head *id_head) | 188 | static int dsos__write_buildid_table(int fd) |
180 | { | 189 | { |
181 | struct build_id_list *iter, *next; | 190 | struct dso *pos; |
182 | 191 | ||
183 | list_for_each_entry_safe(iter, next, id_head, list) { | 192 | list_for_each_entry(pos, &dsos, node) { |
184 | struct build_id_event *b = &iter->event; | 193 | int err; |
185 | 194 | struct build_id_event b; | |
186 | if (do_write(fd, b, sizeof(*b)) < 0 || | 195 | size_t len; |
187 | do_write(fd, iter->dso_name, iter->len) < 0) | 196 | |
188 | return -1; | 197 | if (!pos->has_build_id) |
189 | list_del(&iter->list); | 198 | continue; |
190 | free(iter); | 199 | len = pos->long_name_len + 1; |
200 | len = ALIGN(len, 64); | ||
201 | memset(&b, 0, sizeof(b)); | ||
202 | memcpy(&b.build_id, pos->build_id, sizeof(pos->build_id)); | ||
203 | b.header.size = sizeof(b) + len; | ||
204 | err = do_write(fd, &b, sizeof(b)); | ||
205 | if (err < 0) | ||
206 | return err; | ||
207 | err = do_write(fd, pos->long_name, len); | ||
208 | if (err < 0) | ||
209 | return err; | ||
191 | } | 210 | } |
192 | 211 | ||
193 | return 0; | 212 | return 0; |
194 | } | 213 | } |
195 | 214 | ||
196 | static void | 215 | static int perf_header__adds_write(struct perf_header *self, int fd) |
197 | perf_header__adds_write(struct perf_header *self, int fd) | ||
198 | { | 216 | { |
199 | LIST_HEAD(id_list); | ||
200 | int nr_sections; | 217 | int nr_sections; |
201 | struct perf_file_section *feat_sec; | 218 | struct perf_file_section *feat_sec; |
202 | int sec_size; | 219 | int sec_size; |
203 | u64 sec_start; | 220 | u64 sec_start; |
204 | int idx = 0; | 221 | int idx = 0, err; |
205 | 222 | ||
206 | if (fetch_build_id_table(&id_list)) | 223 | if (dsos__read_build_ids()) |
207 | perf_header__set_feat(self, HEADER_BUILD_ID); | 224 | perf_header__set_feat(self, HEADER_BUILD_ID); |
208 | 225 | ||
209 | nr_sections = bitmap_weight(self->adds_features, HEADER_FEAT_BITS); | 226 | nr_sections = bitmap_weight(self->adds_features, HEADER_FEAT_BITS); |
210 | if (!nr_sections) | 227 | if (!nr_sections) |
211 | return; | 228 | return 0; |
212 | 229 | ||
213 | feat_sec = calloc(sizeof(*feat_sec), nr_sections); | 230 | feat_sec = calloc(sizeof(*feat_sec), nr_sections); |
214 | if (!feat_sec) | 231 | if (feat_sec == NULL) |
215 | die("No memory"); | 232 | return -ENOMEM; |
216 | 233 | ||
217 | sec_size = sizeof(*feat_sec) * nr_sections; | 234 | sec_size = sizeof(*feat_sec) * nr_sections; |
218 | 235 | ||
@@ -236,25 +253,37 @@ perf_header__adds_write(struct perf_header *self, int fd) | |||
236 | 253 | ||
237 | buildid_sec = &feat_sec[idx++]; | 254 | buildid_sec = &feat_sec[idx++]; |
238 | 255 | ||
256 | /* | ||
257 | * Read the kernel buildid nad the list of loaded modules with | ||
258 | * its build_ids: | ||
259 | */ | ||
260 | kernel_maps__init(NULL, false, true); | ||
261 | |||
239 | /* Write build-ids */ | 262 | /* Write build-ids */ |
240 | buildid_sec->offset = lseek(fd, 0, SEEK_CUR); | 263 | buildid_sec->offset = lseek(fd, 0, SEEK_CUR); |
241 | if (write_buildid_table(fd, &id_list) < 0) | 264 | err = dsos__write_buildid_table(fd); |
242 | die("failed to write buildid table"); | 265 | if (err < 0) { |
266 | pr_debug("failed to write buildid table\n"); | ||
267 | goto out_free; | ||
268 | } | ||
243 | buildid_sec->size = lseek(fd, 0, SEEK_CUR) - buildid_sec->offset; | 269 | buildid_sec->size = lseek(fd, 0, SEEK_CUR) - buildid_sec->offset; |
244 | } | 270 | } |
245 | 271 | ||
246 | lseek(fd, sec_start, SEEK_SET); | 272 | lseek(fd, sec_start, SEEK_SET); |
247 | if (do_write(fd, feat_sec, sec_size) < 0) | 273 | err = do_write(fd, feat_sec, sec_size); |
248 | die("failed to write feature section"); | 274 | if (err < 0) |
275 | pr_debug("failed to write feature section\n"); | ||
276 | out_free: | ||
249 | free(feat_sec); | 277 | free(feat_sec); |
278 | return err; | ||
250 | } | 279 | } |
251 | 280 | ||
252 | void perf_header__write(struct perf_header *self, int fd, bool at_exit) | 281 | int perf_header__write(struct perf_header *self, int fd, bool at_exit) |
253 | { | 282 | { |
254 | struct perf_file_header f_header; | 283 | struct perf_file_header f_header; |
255 | struct perf_file_attr f_attr; | 284 | struct perf_file_attr f_attr; |
256 | struct perf_header_attr *attr; | 285 | struct perf_header_attr *attr; |
257 | int i; | 286 | int i, err; |
258 | 287 | ||
259 | lseek(fd, sizeof(f_header), SEEK_SET); | 288 | lseek(fd, sizeof(f_header), SEEK_SET); |
260 | 289 | ||
@@ -263,8 +292,11 @@ void perf_header__write(struct perf_header *self, int fd, bool at_exit) | |||
263 | attr = self->attr[i]; | 292 | attr = self->attr[i]; |
264 | 293 | ||
265 | attr->id_offset = lseek(fd, 0, SEEK_CUR); | 294 | attr->id_offset = lseek(fd, 0, SEEK_CUR); |
266 | if (do_write(fd, attr->id, attr->ids * sizeof(u64)) < 0) | 295 | err = do_write(fd, attr->id, attr->ids * sizeof(u64)); |
267 | die("failed to write perf header"); | 296 | if (err < 0) { |
297 | pr_debug("failed to write perf header\n"); | ||
298 | return err; | ||
299 | } | ||
268 | } | 300 | } |
269 | 301 | ||
270 | 302 | ||
@@ -280,20 +312,30 @@ void perf_header__write(struct perf_header *self, int fd, bool at_exit) | |||
280 | .size = attr->ids * sizeof(u64), | 312 | .size = attr->ids * sizeof(u64), |
281 | } | 313 | } |
282 | }; | 314 | }; |
283 | if (do_write(fd, &f_attr, sizeof(f_attr)) < 0) | 315 | err = do_write(fd, &f_attr, sizeof(f_attr)); |
284 | die("failed to write perf header attribute"); | 316 | if (err < 0) { |
317 | pr_debug("failed to write perf header attribute\n"); | ||
318 | return err; | ||
319 | } | ||
285 | } | 320 | } |
286 | 321 | ||
287 | self->event_offset = lseek(fd, 0, SEEK_CUR); | 322 | self->event_offset = lseek(fd, 0, SEEK_CUR); |
288 | self->event_size = event_count * sizeof(struct perf_trace_event_type); | 323 | self->event_size = event_count * sizeof(struct perf_trace_event_type); |
289 | if (events) | 324 | if (events) { |
290 | if (do_write(fd, events, self->event_size) < 0) | 325 | err = do_write(fd, events, self->event_size); |
291 | die("failed to write perf header events"); | 326 | if (err < 0) { |
327 | pr_debug("failed to write perf header events\n"); | ||
328 | return err; | ||
329 | } | ||
330 | } | ||
292 | 331 | ||
293 | self->data_offset = lseek(fd, 0, SEEK_CUR); | 332 | self->data_offset = lseek(fd, 0, SEEK_CUR); |
294 | 333 | ||
295 | if (at_exit) | 334 | if (at_exit) { |
296 | perf_header__adds_write(self, fd); | 335 | err = perf_header__adds_write(self, fd); |
336 | if (err < 0) | ||
337 | return err; | ||
338 | } | ||
297 | 339 | ||
298 | f_header = (struct perf_file_header){ | 340 | f_header = (struct perf_file_header){ |
299 | .magic = PERF_MAGIC, | 341 | .magic = PERF_MAGIC, |
@@ -316,11 +358,15 @@ void perf_header__write(struct perf_header *self, int fd, bool at_exit) | |||
316 | memcpy(&f_header.adds_features, &self->adds_features, sizeof(self->adds_features)); | 358 | memcpy(&f_header.adds_features, &self->adds_features, sizeof(self->adds_features)); |
317 | 359 | ||
318 | lseek(fd, 0, SEEK_SET); | 360 | lseek(fd, 0, SEEK_SET); |
319 | if (do_write(fd, &f_header, sizeof(f_header)) < 0) | 361 | err = do_write(fd, &f_header, sizeof(f_header)); |
320 | die("failed to write perf header"); | 362 | if (err < 0) { |
363 | pr_debug("failed to write perf header\n"); | ||
364 | return err; | ||
365 | } | ||
321 | lseek(fd, self->data_offset + self->data_size, SEEK_SET); | 366 | lseek(fd, self->data_offset + self->data_size, SEEK_SET); |
322 | 367 | ||
323 | self->frozen = 1; | 368 | self->frozen = 1; |
369 | return 0; | ||
324 | } | 370 | } |
325 | 371 | ||
326 | static void do_read(int fd, void *buf, size_t size) | 372 | static void do_read(int fd, void *buf, size_t size) |
@@ -430,19 +476,17 @@ static int perf_file_section__process(struct perf_file_section *self, | |||
430 | return 0; | 476 | return 0; |
431 | } | 477 | } |
432 | 478 | ||
433 | struct perf_header *perf_header__read(int fd) | 479 | int perf_header__read(struct perf_header *self, int fd) |
434 | { | 480 | { |
435 | struct perf_header *self = perf_header__new(); | ||
436 | struct perf_file_header f_header; | 481 | struct perf_file_header f_header; |
437 | struct perf_file_attr f_attr; | 482 | struct perf_file_attr f_attr; |
438 | u64 f_id; | 483 | u64 f_id; |
439 | int nr_attrs, nr_ids, i, j; | 484 | int nr_attrs, nr_ids, i, j; |
440 | 485 | ||
441 | if (self == NULL) | 486 | if (perf_file_header__read(&f_header, self, fd) < 0) { |
442 | die("nomem"); | 487 | pr_debug("incompatible file format\n"); |
443 | 488 | return -EINVAL; | |
444 | if (perf_file_header__read(&f_header, self, fd) < 0) | 489 | } |
445 | die("incompatible file format"); | ||
446 | 490 | ||
447 | nr_attrs = f_header.attrs.size / sizeof(f_attr); | 491 | nr_attrs = f_header.attrs.size / sizeof(f_attr); |
448 | lseek(fd, f_header.attrs.offset, SEEK_SET); | 492 | lseek(fd, f_header.attrs.offset, SEEK_SET); |
@@ -456,7 +500,7 @@ struct perf_header *perf_header__read(int fd) | |||
456 | 500 | ||
457 | attr = perf_header_attr__new(&f_attr.attr); | 501 | attr = perf_header_attr__new(&f_attr.attr); |
458 | if (attr == NULL) | 502 | if (attr == NULL) |
459 | die("nomem"); | 503 | return -ENOMEM; |
460 | 504 | ||
461 | nr_ids = f_attr.ids.size / sizeof(u64); | 505 | nr_ids = f_attr.ids.size / sizeof(u64); |
462 | lseek(fd, f_attr.ids.offset, SEEK_SET); | 506 | lseek(fd, f_attr.ids.offset, SEEK_SET); |
@@ -464,11 +508,15 @@ struct perf_header *perf_header__read(int fd) | |||
464 | for (j = 0; j < nr_ids; j++) { | 508 | for (j = 0; j < nr_ids; j++) { |
465 | do_read(fd, &f_id, sizeof(f_id)); | 509 | do_read(fd, &f_id, sizeof(f_id)); |
466 | 510 | ||
467 | if (perf_header_attr__add_id(attr, f_id) < 0) | 511 | if (perf_header_attr__add_id(attr, f_id) < 0) { |
468 | die("nomem"); | 512 | perf_header_attr__delete(attr); |
513 | return -ENOMEM; | ||
514 | } | ||
515 | } | ||
516 | if (perf_header__add_attr(self, attr) < 0) { | ||
517 | perf_header_attr__delete(attr); | ||
518 | return -ENOMEM; | ||
469 | } | 519 | } |
470 | if (perf_header__add_attr(self, attr) < 0) | ||
471 | die("nomem"); | ||
472 | 520 | ||
473 | lseek(fd, tmp, SEEK_SET); | 521 | lseek(fd, tmp, SEEK_SET); |
474 | } | 522 | } |
@@ -476,8 +524,8 @@ struct perf_header *perf_header__read(int fd) | |||
476 | if (f_header.event_types.size) { | 524 | if (f_header.event_types.size) { |
477 | lseek(fd, f_header.event_types.offset, SEEK_SET); | 525 | lseek(fd, f_header.event_types.offset, SEEK_SET); |
478 | events = malloc(f_header.event_types.size); | 526 | events = malloc(f_header.event_types.size); |
479 | if (!events) | 527 | if (events == NULL) |
480 | die("nomem"); | 528 | return -ENOMEM; |
481 | do_read(fd, events, f_header.event_types.size); | 529 | do_read(fd, events, f_header.event_types.size); |
482 | event_count = f_header.event_types.size / sizeof(struct perf_trace_event_type); | 530 | event_count = f_header.event_types.size / sizeof(struct perf_trace_event_type); |
483 | } | 531 | } |
@@ -487,8 +535,7 @@ struct perf_header *perf_header__read(int fd) | |||
487 | lseek(fd, self->data_offset, SEEK_SET); | 535 | lseek(fd, self->data_offset, SEEK_SET); |
488 | 536 | ||
489 | self->frozen = 1; | 537 | self->frozen = 1; |
490 | 538 | return 0; | |
491 | return self; | ||
492 | } | 539 | } |
493 | 540 | ||
494 | u64 perf_header__sample_type(struct perf_header *header) | 541 | u64 perf_header__sample_type(struct perf_header *header) |
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index f46a94e09eea..d1dbe2b79c42 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h | |||
@@ -55,8 +55,11 @@ struct perf_header { | |||
55 | DECLARE_BITMAP(adds_features, HEADER_FEAT_BITS); | 55 | DECLARE_BITMAP(adds_features, HEADER_FEAT_BITS); |
56 | }; | 56 | }; |
57 | 57 | ||
58 | struct perf_header *perf_header__read(int fd); | 58 | struct perf_header *perf_header__new(void); |
59 | void perf_header__write(struct perf_header *self, int fd, bool at_exit); | 59 | void perf_header__delete(struct perf_header *self); |
60 | |||
61 | int perf_header__read(struct perf_header *self, int fd); | ||
62 | int perf_header__write(struct perf_header *self, int fd, bool at_exit); | ||
60 | 63 | ||
61 | int perf_header__add_attr(struct perf_header *self, | 64 | int perf_header__add_attr(struct perf_header *self, |
62 | struct perf_header_attr *attr); | 65 | struct perf_header_attr *attr); |
@@ -75,8 +78,6 @@ perf_header__find_attr(u64 id, struct perf_header *header); | |||
75 | void perf_header__set_feat(struct perf_header *self, int feat); | 78 | void perf_header__set_feat(struct perf_header *self, int feat); |
76 | bool perf_header__has_feat(const struct perf_header *self, int feat); | 79 | bool perf_header__has_feat(const struct perf_header *self, int feat); |
77 | 80 | ||
78 | struct perf_header *perf_header__new(void); | ||
79 | |||
80 | int perf_header__process_sections(struct perf_header *self, int fd, | 81 | int perf_header__process_sections(struct perf_header *self, int fd, |
81 | int (*process)(struct perf_file_section *self, | 82 | int (*process)(struct perf_file_section *self, |
82 | int feat, int fd)); | 83 | int feat, int fd)); |
diff --git a/tools/perf/util/include/linux/bitops.h b/tools/perf/util/include/linux/bitops.h index ace57c36d1d0..8d63116e9435 100644 --- a/tools/perf/util/include/linux/bitops.h +++ b/tools/perf/util/include/linux/bitops.h | |||
@@ -7,6 +7,8 @@ | |||
7 | #define CONFIG_GENERIC_FIND_FIRST_BIT | 7 | #define CONFIG_GENERIC_FIND_FIRST_BIT |
8 | #include "../../../../include/linux/bitops.h" | 8 | #include "../../../../include/linux/bitops.h" |
9 | 9 | ||
10 | #undef __KERNEL__ | ||
11 | |||
10 | static inline void set_bit(int nr, unsigned long *addr) | 12 | static inline void set_bit(int nr, unsigned long *addr) |
11 | { | 13 | { |
12 | addr[nr / BITS_PER_LONG] |= 1UL << (nr % BITS_PER_LONG); | 14 | addr[nr / BITS_PER_LONG] |= 1UL << (nr % BITS_PER_LONG); |
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 94ca95073c40..09412321a80d 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c | |||
@@ -75,6 +75,29 @@ out_delete: | |||
75 | return NULL; | 75 | return NULL; |
76 | } | 76 | } |
77 | 77 | ||
78 | void map__delete(struct map *self) | ||
79 | { | ||
80 | free(self); | ||
81 | } | ||
82 | |||
83 | void map__fixup_start(struct map *self) | ||
84 | { | ||
85 | struct rb_node *nd = rb_first(&self->dso->syms); | ||
86 | if (nd != NULL) { | ||
87 | struct symbol *sym = rb_entry(nd, struct symbol, rb_node); | ||
88 | self->start = sym->start; | ||
89 | } | ||
90 | } | ||
91 | |||
92 | void map__fixup_end(struct map *self) | ||
93 | { | ||
94 | struct rb_node *nd = rb_last(&self->dso->syms); | ||
95 | if (nd != NULL) { | ||
96 | struct symbol *sym = rb_entry(nd, struct symbol, rb_node); | ||
97 | self->end = sym->end; | ||
98 | } | ||
99 | } | ||
100 | |||
78 | #define DSO__DELETED "(deleted)" | 101 | #define DSO__DELETED "(deleted)" |
79 | 102 | ||
80 | struct symbol * | 103 | struct symbol * |
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 0faf4f2bb5ca..070027469270 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c | |||
@@ -1,4 +1,4 @@ | |||
1 | 1 | #include "../../../include/linux/hw_breakpoint.h" | |
2 | #include "util.h" | 2 | #include "util.h" |
3 | #include "../perf.h" | 3 | #include "../perf.h" |
4 | #include "parse-options.h" | 4 | #include "parse-options.h" |
@@ -540,6 +540,81 @@ static enum event_result parse_tracepoint_event(const char **strp, | |||
540 | attr, strp); | 540 | attr, strp); |
541 | } | 541 | } |
542 | 542 | ||
543 | static enum event_result | ||
544 | parse_breakpoint_type(const char *type, const char **strp, | ||
545 | struct perf_event_attr *attr) | ||
546 | { | ||
547 | int i; | ||
548 | |||
549 | for (i = 0; i < 3; i++) { | ||
550 | if (!type[i]) | ||
551 | break; | ||
552 | |||
553 | switch (type[i]) { | ||
554 | case 'r': | ||
555 | attr->bp_type |= HW_BREAKPOINT_R; | ||
556 | break; | ||
557 | case 'w': | ||
558 | attr->bp_type |= HW_BREAKPOINT_W; | ||
559 | break; | ||
560 | case 'x': | ||
561 | attr->bp_type |= HW_BREAKPOINT_X; | ||
562 | break; | ||
563 | default: | ||
564 | return EVT_FAILED; | ||
565 | } | ||
566 | } | ||
567 | if (!attr->bp_type) /* Default */ | ||
568 | attr->bp_type = HW_BREAKPOINT_R | HW_BREAKPOINT_W; | ||
569 | |||
570 | *strp = type + i; | ||
571 | |||
572 | return EVT_HANDLED; | ||
573 | } | ||
574 | |||
575 | static enum event_result | ||
576 | parse_breakpoint_event(const char **strp, struct perf_event_attr *attr) | ||
577 | { | ||
578 | const char *target; | ||
579 | const char *type; | ||
580 | char *endaddr; | ||
581 | u64 addr; | ||
582 | enum event_result err; | ||
583 | |||
584 | target = strchr(*strp, ':'); | ||
585 | if (!target) | ||
586 | return EVT_FAILED; | ||
587 | |||
588 | if (strncmp(*strp, "mem", target - *strp) != 0) | ||
589 | return EVT_FAILED; | ||
590 | |||
591 | target++; | ||
592 | |||
593 | addr = strtoull(target, &endaddr, 0); | ||
594 | if (target == endaddr) | ||
595 | return EVT_FAILED; | ||
596 | |||
597 | attr->bp_addr = addr; | ||
598 | *strp = endaddr; | ||
599 | |||
600 | type = strchr(target, ':'); | ||
601 | |||
602 | /* If no type is defined, just rw as default */ | ||
603 | if (!type) { | ||
604 | attr->bp_type = HW_BREAKPOINT_R | HW_BREAKPOINT_W; | ||
605 | } else { | ||
606 | err = parse_breakpoint_type(++type, strp, attr); | ||
607 | if (err == EVT_FAILED) | ||
608 | return EVT_FAILED; | ||
609 | } | ||
610 | |||
611 | /* We should find a nice way to override the access type */ | ||
612 | attr->bp_len = HW_BREAKPOINT_LEN_4; | ||
613 | attr->type = PERF_TYPE_BREAKPOINT; | ||
614 | |||
615 | return EVT_HANDLED; | ||
616 | } | ||
617 | |||
543 | static int check_events(const char *str, unsigned int i) | 618 | static int check_events(const char *str, unsigned int i) |
544 | { | 619 | { |
545 | int n; | 620 | int n; |
@@ -673,6 +748,10 @@ parse_event_symbols(const char **str, struct perf_event_attr *attr) | |||
673 | if (ret != EVT_FAILED) | 748 | if (ret != EVT_FAILED) |
674 | goto modifier; | 749 | goto modifier; |
675 | 750 | ||
751 | ret = parse_breakpoint_event(str, attr); | ||
752 | if (ret != EVT_FAILED) | ||
753 | goto modifier; | ||
754 | |||
676 | fprintf(stderr, "invalid or unsupported event: '%s'\n", *str); | 755 | fprintf(stderr, "invalid or unsupported event: '%s'\n", *str); |
677 | fprintf(stderr, "Run 'perf list' for a list of valid events\n"); | 756 | fprintf(stderr, "Run 'perf list' for a list of valid events\n"); |
678 | return EVT_FAILED; | 757 | return EVT_FAILED; |
@@ -859,6 +938,9 @@ void print_events(void) | |||
859 | "rNNN"); | 938 | "rNNN"); |
860 | printf("\n"); | 939 | printf("\n"); |
861 | 940 | ||
941 | printf(" %-42s [hardware breakpoint]\n", "mem:<addr>[:access]"); | ||
942 | printf("\n"); | ||
943 | |||
862 | print_tracepoint_events(); | 944 | print_tracepoint_events(); |
863 | 945 | ||
864 | exit(129); | 946 | exit(129); |
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 5cc96c86861b..44d81d5ae8cf 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c | |||
@@ -9,8 +9,13 @@ | |||
9 | #include <libelf.h> | 9 | #include <libelf.h> |
10 | #include <gelf.h> | 10 | #include <gelf.h> |
11 | #include <elf.h> | 11 | #include <elf.h> |
12 | #include <limits.h> | ||
12 | #include <sys/utsname.h> | 13 | #include <sys/utsname.h> |
13 | 14 | ||
15 | #ifndef NT_GNU_BUILD_ID | ||
16 | #define NT_GNU_BUILD_ID 3 | ||
17 | #endif | ||
18 | |||
14 | enum dso_origin { | 19 | enum dso_origin { |
15 | DSO__ORIG_KERNEL = 0, | 20 | DSO__ORIG_KERNEL = 0, |
16 | DSO__ORIG_JAVA_JIT, | 21 | DSO__ORIG_JAVA_JIT, |
@@ -26,7 +31,11 @@ static void dsos__add(struct dso *dso); | |||
26 | static struct dso *dsos__find(const char *name); | 31 | static struct dso *dsos__find(const char *name); |
27 | static struct map *map__new2(u64 start, struct dso *dso); | 32 | static struct map *map__new2(u64 start, struct dso *dso); |
28 | static void kernel_maps__insert(struct map *map); | 33 | static void kernel_maps__insert(struct map *map); |
34 | static int dso__load_kernel_sym(struct dso *self, struct map *map, | ||
35 | symbol_filter_t filter); | ||
29 | unsigned int symbol__priv_size; | 36 | unsigned int symbol__priv_size; |
37 | static int vmlinux_path__nr_entries; | ||
38 | static char **vmlinux_path; | ||
30 | 39 | ||
31 | static struct rb_root kernel_maps; | 40 | static struct rb_root kernel_maps; |
32 | 41 | ||
@@ -69,11 +78,11 @@ static void kernel_maps__fixup_end(void) | |||
69 | prev->end = curr->start - 1; | 78 | prev->end = curr->start - 1; |
70 | } | 79 | } |
71 | 80 | ||
72 | nd = rb_last(&curr->dso->syms); | 81 | /* |
73 | if (nd) { | 82 | * We still haven't the actual symbols, so guess the |
74 | struct symbol *sym = rb_entry(nd, struct symbol, rb_node); | 83 | * last map final address. |
75 | curr->end = sym->end; | 84 | */ |
76 | } | 85 | curr->end = ~0UL; |
77 | } | 86 | } |
78 | 87 | ||
79 | static struct symbol *symbol__new(u64 start, u64 len, const char *name) | 88 | static struct symbol *symbol__new(u64 start, u64 len, const char *name) |
@@ -111,6 +120,8 @@ static size_t symbol__fprintf(struct symbol *self, FILE *fp) | |||
111 | 120 | ||
112 | static void dso__set_long_name(struct dso *self, char *name) | 121 | static void dso__set_long_name(struct dso *self, char *name) |
113 | { | 122 | { |
123 | if (name == NULL) | ||
124 | return; | ||
114 | self->long_name = name; | 125 | self->long_name = name; |
115 | self->long_name_len = strlen(name); | 126 | self->long_name_len = strlen(name); |
116 | } | 127 | } |
@@ -323,7 +334,7 @@ out_failure: | |||
323 | * kernel range is broken in several maps, named [kernel].N, as we don't have | 334 | * kernel range is broken in several maps, named [kernel].N, as we don't have |
324 | * the original ELF section names vmlinux have. | 335 | * the original ELF section names vmlinux have. |
325 | */ | 336 | */ |
326 | static int kernel_maps__split_kallsyms(symbol_filter_t filter, int use_modules) | 337 | static int kernel_maps__split_kallsyms(symbol_filter_t filter) |
327 | { | 338 | { |
328 | struct map *map = kernel_map; | 339 | struct map *map = kernel_map; |
329 | struct symbol *pos; | 340 | struct symbol *pos; |
@@ -339,9 +350,6 @@ static int kernel_maps__split_kallsyms(symbol_filter_t filter, int use_modules) | |||
339 | 350 | ||
340 | module = strchr(pos->name, '\t'); | 351 | module = strchr(pos->name, '\t'); |
341 | if (module) { | 352 | if (module) { |
342 | if (!use_modules) | ||
343 | goto delete_symbol; | ||
344 | |||
345 | *module++ = '\0'; | 353 | *module++ = '\0'; |
346 | 354 | ||
347 | if (strcmp(map->dso->name, module)) { | 355 | if (strcmp(map->dso->name, module)) { |
@@ -381,7 +389,6 @@ static int kernel_maps__split_kallsyms(symbol_filter_t filter, int use_modules) | |||
381 | } | 389 | } |
382 | 390 | ||
383 | if (filter && filter(map, pos)) { | 391 | if (filter && filter(map, pos)) { |
384 | delete_symbol: | ||
385 | rb_erase(&pos->rb_node, &kernel_map->dso->syms); | 392 | rb_erase(&pos->rb_node, &kernel_map->dso->syms); |
386 | symbol__delete(pos); | 393 | symbol__delete(pos); |
387 | } else { | 394 | } else { |
@@ -397,17 +404,18 @@ delete_symbol: | |||
397 | } | 404 | } |
398 | 405 | ||
399 | 406 | ||
400 | static int kernel_maps__load_kallsyms(symbol_filter_t filter, int use_modules) | 407 | static int kernel_maps__load_kallsyms(symbol_filter_t filter) |
401 | { | 408 | { |
402 | if (kernel_maps__load_all_kallsyms()) | 409 | if (kernel_maps__load_all_kallsyms()) |
403 | return -1; | 410 | return -1; |
404 | 411 | ||
405 | dso__fixup_sym_end(kernel_map->dso); | 412 | dso__fixup_sym_end(kernel_map->dso); |
413 | kernel_map->dso->origin = DSO__ORIG_KERNEL; | ||
406 | 414 | ||
407 | return kernel_maps__split_kallsyms(filter, use_modules); | 415 | return kernel_maps__split_kallsyms(filter); |
408 | } | 416 | } |
409 | 417 | ||
410 | static size_t kernel_maps__fprintf(FILE *fp) | 418 | size_t kernel_maps__fprintf(FILE *fp) |
411 | { | 419 | { |
412 | size_t printed = fprintf(fp, "Kernel maps:\n"); | 420 | size_t printed = fprintf(fp, "Kernel maps:\n"); |
413 | struct rb_node *nd; | 421 | struct rb_node *nd; |
@@ -883,47 +891,40 @@ out_close: | |||
883 | return err; | 891 | return err; |
884 | } | 892 | } |
885 | 893 | ||
886 | bool fetch_build_id_table(struct list_head *head) | 894 | static bool dso__build_id_equal(const struct dso *self, u8 *build_id) |
887 | { | 895 | { |
888 | bool have_buildid = false; | 896 | return memcmp(self->build_id, build_id, sizeof(self->build_id)) == 0; |
889 | struct dso *pos; | 897 | } |
890 | |||
891 | list_for_each_entry(pos, &dsos, node) { | ||
892 | struct build_id_list *new; | ||
893 | struct build_id_event b; | ||
894 | size_t len; | ||
895 | |||
896 | if (filename__read_build_id(pos->long_name, | ||
897 | &b.build_id, | ||
898 | sizeof(b.build_id)) < 0) | ||
899 | continue; | ||
900 | have_buildid = true; | ||
901 | memset(&b.header, 0, sizeof(b.header)); | ||
902 | len = pos->long_name_len + 1; | ||
903 | len = ALIGN(len, 64); | ||
904 | b.header.size = sizeof(b) + len; | ||
905 | |||
906 | new = malloc(sizeof(*new)); | ||
907 | if (!new) | ||
908 | die("No memory\n"); | ||
909 | 898 | ||
910 | memcpy(&new->event, &b, sizeof(b)); | 899 | bool dsos__read_build_ids(void) |
911 | new->dso_name = pos->long_name; | 900 | { |
912 | new->len = len; | 901 | bool have_build_id = false; |
902 | struct dso *pos; | ||
913 | 903 | ||
914 | list_add_tail(&new->list, head); | 904 | list_for_each_entry(pos, &dsos, node) |
915 | } | 905 | if (filename__read_build_id(pos->long_name, pos->build_id, |
906 | sizeof(pos->build_id)) > 0) { | ||
907 | have_build_id = true; | ||
908 | pos->has_build_id = true; | ||
909 | } | ||
916 | 910 | ||
917 | return have_buildid; | 911 | return have_build_id; |
918 | } | 912 | } |
919 | 913 | ||
914 | /* | ||
915 | * Align offset to 4 bytes as needed for note name and descriptor data. | ||
916 | */ | ||
917 | #define NOTE_ALIGN(n) (((n) + 3) & -4U) | ||
918 | |||
920 | int filename__read_build_id(const char *filename, void *bf, size_t size) | 919 | int filename__read_build_id(const char *filename, void *bf, size_t size) |
921 | { | 920 | { |
922 | int fd, err = -1; | 921 | int fd, err = -1; |
923 | GElf_Ehdr ehdr; | 922 | GElf_Ehdr ehdr; |
924 | GElf_Shdr shdr; | 923 | GElf_Shdr shdr; |
925 | Elf_Data *build_id_data; | 924 | Elf_Data *data; |
926 | Elf_Scn *sec; | 925 | Elf_Scn *sec; |
926 | Elf_Kind ek; | ||
927 | void *ptr; | ||
927 | Elf *elf; | 928 | Elf *elf; |
928 | 929 | ||
929 | if (size < BUILD_ID_SIZE) | 930 | if (size < BUILD_ID_SIZE) |
@@ -939,6 +940,10 @@ int filename__read_build_id(const char *filename, void *bf, size_t size) | |||
939 | goto out_close; | 940 | goto out_close; |
940 | } | 941 | } |
941 | 942 | ||
943 | ek = elf_kind(elf); | ||
944 | if (ek != ELF_K_ELF) | ||
945 | goto out_elf_end; | ||
946 | |||
942 | if (gelf_getehdr(elf, &ehdr) == NULL) { | 947 | if (gelf_getehdr(elf, &ehdr) == NULL) { |
943 | pr_err("%s: cannot get elf header.\n", __func__); | 948 | pr_err("%s: cannot get elf header.\n", __func__); |
944 | goto out_elf_end; | 949 | goto out_elf_end; |
@@ -946,14 +951,37 @@ int filename__read_build_id(const char *filename, void *bf, size_t size) | |||
946 | 951 | ||
947 | sec = elf_section_by_name(elf, &ehdr, &shdr, | 952 | sec = elf_section_by_name(elf, &ehdr, &shdr, |
948 | ".note.gnu.build-id", NULL); | 953 | ".note.gnu.build-id", NULL); |
949 | if (sec == NULL) | 954 | if (sec == NULL) { |
950 | goto out_elf_end; | 955 | sec = elf_section_by_name(elf, &ehdr, &shdr, |
956 | ".notes", NULL); | ||
957 | if (sec == NULL) | ||
958 | goto out_elf_end; | ||
959 | } | ||
951 | 960 | ||
952 | build_id_data = elf_getdata(sec, NULL); | 961 | data = elf_getdata(sec, NULL); |
953 | if (build_id_data == NULL) | 962 | if (data == NULL) |
954 | goto out_elf_end; | 963 | goto out_elf_end; |
955 | memcpy(bf, build_id_data->d_buf + 16, BUILD_ID_SIZE); | 964 | |
956 | err = BUILD_ID_SIZE; | 965 | ptr = data->d_buf; |
966 | while (ptr < (data->d_buf + data->d_size)) { | ||
967 | GElf_Nhdr *nhdr = ptr; | ||
968 | int namesz = NOTE_ALIGN(nhdr->n_namesz), | ||
969 | descsz = NOTE_ALIGN(nhdr->n_descsz); | ||
970 | const char *name; | ||
971 | |||
972 | ptr += sizeof(*nhdr); | ||
973 | name = ptr; | ||
974 | ptr += namesz; | ||
975 | if (nhdr->n_type == NT_GNU_BUILD_ID && | ||
976 | nhdr->n_namesz == sizeof("GNU")) { | ||
977 | if (memcmp(name, "GNU", sizeof("GNU")) == 0) { | ||
978 | memcpy(bf, ptr, BUILD_ID_SIZE); | ||
979 | err = BUILD_ID_SIZE; | ||
980 | break; | ||
981 | } | ||
982 | } | ||
983 | ptr += descsz; | ||
984 | } | ||
957 | out_elf_end: | 985 | out_elf_end: |
958 | elf_end(elf); | 986 | elf_end(elf); |
959 | out_close: | 987 | out_close: |
@@ -962,23 +990,48 @@ out: | |||
962 | return err; | 990 | return err; |
963 | } | 991 | } |
964 | 992 | ||
965 | static char *dso__read_build_id(struct dso *self) | 993 | int sysfs__read_build_id(const char *filename, void *build_id, size_t size) |
966 | { | 994 | { |
967 | int len; | 995 | int fd, err = -1; |
968 | char *build_id = NULL; | ||
969 | unsigned char rawbf[BUILD_ID_SIZE]; | ||
970 | 996 | ||
971 | len = filename__read_build_id(self->long_name, rawbf, sizeof(rawbf)); | 997 | if (size < BUILD_ID_SIZE) |
972 | if (len < 0) | ||
973 | goto out; | 998 | goto out; |
974 | 999 | ||
975 | build_id = malloc(len * 2 + 1); | 1000 | fd = open(filename, O_RDONLY); |
976 | if (build_id == NULL) | 1001 | if (fd < 0) |
977 | goto out; | 1002 | goto out; |
978 | 1003 | ||
979 | build_id__sprintf(rawbf, len, build_id); | 1004 | while (1) { |
1005 | char bf[BUFSIZ]; | ||
1006 | GElf_Nhdr nhdr; | ||
1007 | int namesz, descsz; | ||
1008 | |||
1009 | if (read(fd, &nhdr, sizeof(nhdr)) != sizeof(nhdr)) | ||
1010 | break; | ||
1011 | |||
1012 | namesz = NOTE_ALIGN(nhdr.n_namesz); | ||
1013 | descsz = NOTE_ALIGN(nhdr.n_descsz); | ||
1014 | if (nhdr.n_type == NT_GNU_BUILD_ID && | ||
1015 | nhdr.n_namesz == sizeof("GNU")) { | ||
1016 | if (read(fd, bf, namesz) != namesz) | ||
1017 | break; | ||
1018 | if (memcmp(bf, "GNU", sizeof("GNU")) == 0) { | ||
1019 | if (read(fd, build_id, | ||
1020 | BUILD_ID_SIZE) == BUILD_ID_SIZE) { | ||
1021 | err = 0; | ||
1022 | break; | ||
1023 | } | ||
1024 | } else if (read(fd, bf, descsz) != descsz) | ||
1025 | break; | ||
1026 | } else { | ||
1027 | int n = namesz + descsz; | ||
1028 | if (read(fd, bf, n) != n) | ||
1029 | break; | ||
1030 | } | ||
1031 | } | ||
1032 | close(fd); | ||
980 | out: | 1033 | out: |
981 | return build_id; | 1034 | return err; |
982 | } | 1035 | } |
983 | 1036 | ||
984 | char dso__symtab_origin(const struct dso *self) | 1037 | char dso__symtab_origin(const struct dso *self) |
@@ -1001,12 +1054,17 @@ char dso__symtab_origin(const struct dso *self) | |||
1001 | int dso__load(struct dso *self, struct map *map, symbol_filter_t filter) | 1054 | int dso__load(struct dso *self, struct map *map, symbol_filter_t filter) |
1002 | { | 1055 | { |
1003 | int size = PATH_MAX; | 1056 | int size = PATH_MAX; |
1004 | char *name = malloc(size), *build_id = NULL; | 1057 | char *name; |
1058 | u8 build_id[BUILD_ID_SIZE]; | ||
1005 | int ret = -1; | 1059 | int ret = -1; |
1006 | int fd; | 1060 | int fd; |
1007 | 1061 | ||
1008 | self->loaded = 1; | 1062 | self->loaded = 1; |
1009 | 1063 | ||
1064 | if (self->kernel) | ||
1065 | return dso__load_kernel_sym(self, map, filter); | ||
1066 | |||
1067 | name = malloc(size); | ||
1010 | if (!name) | 1068 | if (!name) |
1011 | return -1; | 1069 | return -1; |
1012 | 1070 | ||
@@ -1023,8 +1081,6 @@ int dso__load(struct dso *self, struct map *map, symbol_filter_t filter) | |||
1023 | 1081 | ||
1024 | more: | 1082 | more: |
1025 | do { | 1083 | do { |
1026 | int berr = 0; | ||
1027 | |||
1028 | self->origin++; | 1084 | self->origin++; |
1029 | switch (self->origin) { | 1085 | switch (self->origin) { |
1030 | case DSO__ORIG_FEDORA: | 1086 | case DSO__ORIG_FEDORA: |
@@ -1036,12 +1092,18 @@ more: | |||
1036 | self->long_name); | 1092 | self->long_name); |
1037 | break; | 1093 | break; |
1038 | case DSO__ORIG_BUILDID: | 1094 | case DSO__ORIG_BUILDID: |
1039 | build_id = dso__read_build_id(self); | 1095 | if (filename__read_build_id(self->long_name, build_id, |
1040 | if (build_id != NULL) { | 1096 | sizeof(build_id))) { |
1097 | char build_id_hex[BUILD_ID_SIZE * 2 + 1]; | ||
1098 | |||
1099 | build_id__sprintf(build_id, sizeof(build_id), | ||
1100 | build_id_hex); | ||
1041 | snprintf(name, size, | 1101 | snprintf(name, size, |
1042 | "/usr/lib/debug/.build-id/%.2s/%s.debug", | 1102 | "/usr/lib/debug/.build-id/%.2s/%s.debug", |
1043 | build_id, build_id + 2); | 1103 | build_id_hex, build_id_hex + 2); |
1044 | goto compare_build_id; | 1104 | if (self->has_build_id) |
1105 | goto compare_build_id; | ||
1106 | break; | ||
1045 | } | 1107 | } |
1046 | self->origin++; | 1108 | self->origin++; |
1047 | /* Fall thru */ | 1109 | /* Fall thru */ |
@@ -1054,18 +1116,11 @@ more: | |||
1054 | } | 1116 | } |
1055 | 1117 | ||
1056 | if (self->has_build_id) { | 1118 | if (self->has_build_id) { |
1057 | bool match; | 1119 | if (filename__read_build_id(name, build_id, |
1058 | build_id = malloc(BUILD_ID_SIZE); | 1120 | sizeof(build_id)) < 0) |
1059 | if (build_id == NULL) | ||
1060 | goto more; | 1121 | goto more; |
1061 | berr = filename__read_build_id(name, build_id, | ||
1062 | BUILD_ID_SIZE); | ||
1063 | compare_build_id: | 1122 | compare_build_id: |
1064 | match = berr > 0 && memcmp(build_id, self->build_id, | 1123 | if (!dso__build_id_equal(self, build_id)) |
1065 | sizeof(self->build_id)) == 0; | ||
1066 | free(build_id); | ||
1067 | build_id = NULL; | ||
1068 | if (!match) | ||
1069 | goto more; | 1124 | goto more; |
1070 | } | 1125 | } |
1071 | 1126 | ||
@@ -1100,7 +1155,8 @@ static void kernel_maps__insert(struct map *map) | |||
1100 | maps__insert(&kernel_maps, map); | 1155 | maps__insert(&kernel_maps, map); |
1101 | } | 1156 | } |
1102 | 1157 | ||
1103 | struct symbol *kernel_maps__find_symbol(u64 ip, struct map **mapp) | 1158 | struct symbol *kernel_maps__find_symbol(u64 ip, struct map **mapp, |
1159 | symbol_filter_t filter) | ||
1104 | { | 1160 | { |
1105 | struct map *map = maps__find(&kernel_maps, ip); | 1161 | struct map *map = maps__find(&kernel_maps, ip); |
1106 | 1162 | ||
@@ -1109,7 +1165,7 @@ struct symbol *kernel_maps__find_symbol(u64 ip, struct map **mapp) | |||
1109 | 1165 | ||
1110 | if (map) { | 1166 | if (map) { |
1111 | ip = map->map_ip(map, ip); | 1167 | ip = map->map_ip(map, ip); |
1112 | return map->dso->find_symbol(map->dso, ip); | 1168 | return map__find_symbol(map, ip, filter); |
1113 | } | 1169 | } |
1114 | 1170 | ||
1115 | return NULL; | 1171 | return NULL; |
@@ -1129,32 +1185,13 @@ struct map *kernel_maps__find_by_dso_name(const char *name) | |||
1129 | return NULL; | 1185 | return NULL; |
1130 | } | 1186 | } |
1131 | 1187 | ||
1132 | static int dso__load_module_sym(struct dso *self, struct map *map, | 1188 | static int dsos__set_modules_path_dir(char *dirname) |
1133 | symbol_filter_t filter) | ||
1134 | { | ||
1135 | int err = 0, fd = open(self->long_name, O_RDONLY); | ||
1136 | |||
1137 | self->loaded = 1; | ||
1138 | |||
1139 | if (fd < 0) { | ||
1140 | pr_err("%s: cannot open %s\n", __func__, self->long_name); | ||
1141 | return err; | ||
1142 | } | ||
1143 | |||
1144 | err = dso__load_sym(self, map, self->long_name, fd, filter, 0, 1); | ||
1145 | close(fd); | ||
1146 | |||
1147 | return err; | ||
1148 | } | ||
1149 | |||
1150 | static int dsos__load_modules_sym_dir(char *dirname, symbol_filter_t filter) | ||
1151 | { | 1189 | { |
1152 | struct dirent *dent; | 1190 | struct dirent *dent; |
1153 | int nr_symbols = 0, err; | ||
1154 | DIR *dir = opendir(dirname); | 1191 | DIR *dir = opendir(dirname); |
1155 | 1192 | ||
1156 | if (!dir) { | 1193 | if (!dir) { |
1157 | pr_err("%s: cannot open %s dir\n", __func__, dirname); | 1194 | pr_debug("%s: cannot open %s dir\n", __func__, dirname); |
1158 | return -1; | 1195 | return -1; |
1159 | } | 1196 | } |
1160 | 1197 | ||
@@ -1168,14 +1205,12 @@ static int dsos__load_modules_sym_dir(char *dirname, symbol_filter_t filter) | |||
1168 | 1205 | ||
1169 | snprintf(path, sizeof(path), "%s/%s", | 1206 | snprintf(path, sizeof(path), "%s/%s", |
1170 | dirname, dent->d_name); | 1207 | dirname, dent->d_name); |
1171 | err = dsos__load_modules_sym_dir(path, filter); | 1208 | if (dsos__set_modules_path_dir(path) < 0) |
1172 | if (err < 0) | ||
1173 | goto failure; | 1209 | goto failure; |
1174 | } else { | 1210 | } else { |
1175 | char *dot = strrchr(dent->d_name, '.'), | 1211 | char *dot = strrchr(dent->d_name, '.'), |
1176 | dso_name[PATH_MAX]; | 1212 | dso_name[PATH_MAX]; |
1177 | struct map *map; | 1213 | struct map *map; |
1178 | struct rb_node *last; | ||
1179 | char *long_name; | 1214 | char *long_name; |
1180 | 1215 | ||
1181 | if (dot == NULL || strcmp(dot, ".ko")) | 1216 | if (dot == NULL || strcmp(dot, ".ko")) |
@@ -1195,36 +1230,16 @@ static int dsos__load_modules_sym_dir(char *dirname, symbol_filter_t filter) | |||
1195 | if (long_name == NULL) | 1230 | if (long_name == NULL) |
1196 | goto failure; | 1231 | goto failure; |
1197 | dso__set_long_name(map->dso, long_name); | 1232 | dso__set_long_name(map->dso, long_name); |
1198 | dso__set_basename(map->dso); | ||
1199 | |||
1200 | err = dso__load_module_sym(map->dso, map, filter); | ||
1201 | if (err < 0) | ||
1202 | goto failure; | ||
1203 | last = rb_last(&map->dso->syms); | ||
1204 | if (last) { | ||
1205 | struct symbol *sym; | ||
1206 | /* | ||
1207 | * We do this here as well, even having the | ||
1208 | * symbol size found in the symtab because | ||
1209 | * misannotated ASM symbols may have the size | ||
1210 | * set to zero. | ||
1211 | */ | ||
1212 | dso__fixup_sym_end(map->dso); | ||
1213 | |||
1214 | sym = rb_entry(last, struct symbol, rb_node); | ||
1215 | map->end = map->start + sym->end; | ||
1216 | } | ||
1217 | } | 1233 | } |
1218 | nr_symbols += err; | ||
1219 | } | 1234 | } |
1220 | 1235 | ||
1221 | return nr_symbols; | 1236 | return 0; |
1222 | failure: | 1237 | failure: |
1223 | closedir(dir); | 1238 | closedir(dir); |
1224 | return -1; | 1239 | return -1; |
1225 | } | 1240 | } |
1226 | 1241 | ||
1227 | static int dsos__load_modules_sym(symbol_filter_t filter) | 1242 | static int dsos__set_modules_path(void) |
1228 | { | 1243 | { |
1229 | struct utsname uts; | 1244 | struct utsname uts; |
1230 | char modules_path[PATH_MAX]; | 1245 | char modules_path[PATH_MAX]; |
@@ -1235,7 +1250,7 @@ static int dsos__load_modules_sym(symbol_filter_t filter) | |||
1235 | snprintf(modules_path, sizeof(modules_path), "/lib/modules/%s/kernel", | 1250 | snprintf(modules_path, sizeof(modules_path), "/lib/modules/%s/kernel", |
1236 | uts.release); | 1251 | uts.release); |
1237 | 1252 | ||
1238 | return dsos__load_modules_sym_dir(modules_path, filter); | 1253 | return dsos__set_modules_path_dir(modules_path); |
1239 | } | 1254 | } |
1240 | 1255 | ||
1241 | /* | 1256 | /* |
@@ -1257,7 +1272,7 @@ static struct map *map__new2(u64 start, struct dso *dso) | |||
1257 | return self; | 1272 | return self; |
1258 | } | 1273 | } |
1259 | 1274 | ||
1260 | static int dsos__load_modules(void) | 1275 | static int kernel_maps__create_module_maps(void) |
1261 | { | 1276 | { |
1262 | char *line = NULL; | 1277 | char *line = NULL; |
1263 | size_t n; | 1278 | size_t n; |
@@ -1307,6 +1322,12 @@ static int dsos__load_modules(void) | |||
1307 | goto out_delete_line; | 1322 | goto out_delete_line; |
1308 | } | 1323 | } |
1309 | 1324 | ||
1325 | snprintf(name, sizeof(name), | ||
1326 | "/sys/module/%s/notes/.note.gnu.build-id", line); | ||
1327 | if (sysfs__read_build_id(name, dso->build_id, | ||
1328 | sizeof(dso->build_id)) == 0) | ||
1329 | dso->has_build_id = true; | ||
1330 | |||
1310 | dso->origin = DSO__ORIG_KMODULE; | 1331 | dso->origin = DSO__ORIG_KMODULE; |
1311 | kernel_maps__insert(map); | 1332 | kernel_maps__insert(map); |
1312 | dsos__add(dso); | 1333 | dsos__add(dso); |
@@ -1315,7 +1336,7 @@ static int dsos__load_modules(void) | |||
1315 | free(line); | 1336 | free(line); |
1316 | fclose(file); | 1337 | fclose(file); |
1317 | 1338 | ||
1318 | return 0; | 1339 | return dsos__set_modules_path(); |
1319 | 1340 | ||
1320 | out_delete_line: | 1341 | out_delete_line: |
1321 | free(line); | 1342 | free(line); |
@@ -1326,13 +1347,37 @@ out_failure: | |||
1326 | static int dso__load_vmlinux(struct dso *self, struct map *map, | 1347 | static int dso__load_vmlinux(struct dso *self, struct map *map, |
1327 | const char *vmlinux, symbol_filter_t filter) | 1348 | const char *vmlinux, symbol_filter_t filter) |
1328 | { | 1349 | { |
1329 | int err, fd = open(vmlinux, O_RDONLY); | 1350 | int err = -1, fd; |
1330 | 1351 | ||
1331 | self->loaded = 1; | 1352 | if (self->has_build_id) { |
1353 | u8 build_id[BUILD_ID_SIZE]; | ||
1354 | |||
1355 | if (filename__read_build_id(vmlinux, build_id, | ||
1356 | sizeof(build_id)) < 0) { | ||
1357 | pr_debug("No build_id in %s, ignoring it\n", vmlinux); | ||
1358 | return -1; | ||
1359 | } | ||
1360 | if (!dso__build_id_equal(self, build_id)) { | ||
1361 | char expected_build_id[BUILD_ID_SIZE * 2 + 1], | ||
1362 | vmlinux_build_id[BUILD_ID_SIZE * 2 + 1]; | ||
1363 | |||
1364 | build_id__sprintf(self->build_id, | ||
1365 | sizeof(self->build_id), | ||
1366 | expected_build_id); | ||
1367 | build_id__sprintf(build_id, sizeof(build_id), | ||
1368 | vmlinux_build_id); | ||
1369 | pr_debug("build_id in %s is %s while expected is %s, " | ||
1370 | "ignoring it\n", vmlinux, vmlinux_build_id, | ||
1371 | expected_build_id); | ||
1372 | return -1; | ||
1373 | } | ||
1374 | } | ||
1332 | 1375 | ||
1376 | fd = open(vmlinux, O_RDONLY); | ||
1333 | if (fd < 0) | 1377 | if (fd < 0) |
1334 | return -1; | 1378 | return -1; |
1335 | 1379 | ||
1380 | self->loaded = 1; | ||
1336 | err = dso__load_sym(self, map, self->long_name, fd, filter, 1, 0); | 1381 | err = dso__load_sym(self, map, self->long_name, fd, filter, 1, 0); |
1337 | 1382 | ||
1338 | close(fd); | 1383 | close(fd); |
@@ -1340,78 +1385,55 @@ static int dso__load_vmlinux(struct dso *self, struct map *map, | |||
1340 | return err; | 1385 | return err; |
1341 | } | 1386 | } |
1342 | 1387 | ||
1343 | int dsos__load_kernel(const char *vmlinux, symbol_filter_t filter, | 1388 | static int dso__load_kernel_sym(struct dso *self, struct map *map, |
1344 | int use_modules) | 1389 | symbol_filter_t filter) |
1345 | { | 1390 | { |
1346 | int err = -1; | 1391 | int err; |
1347 | struct dso *dso = dso__new(vmlinux); | 1392 | bool is_kallsyms; |
1348 | 1393 | ||
1349 | if (dso == NULL) | 1394 | if (vmlinux_path != NULL) { |
1350 | return -1; | 1395 | int i; |
1351 | 1396 | pr_debug("Looking at the vmlinux_path (%d entries long)\n", | |
1352 | dso->short_name = "[kernel]"; | 1397 | vmlinux_path__nr_entries); |
1353 | kernel_map = map__new2(0, dso); | 1398 | for (i = 0; i < vmlinux_path__nr_entries; ++i) { |
1354 | if (kernel_map == NULL) | 1399 | err = dso__load_vmlinux(self, map, vmlinux_path[i], |
1355 | goto out_delete_dso; | 1400 | filter); |
1356 | 1401 | if (err > 0) { | |
1357 | kernel_map->map_ip = kernel_map->unmap_ip = identity__map_ip; | 1402 | pr_debug("Using %s for symbols\n", |
1358 | 1403 | vmlinux_path[i]); | |
1359 | if (use_modules && dsos__load_modules() < 0) { | 1404 | dso__set_long_name(self, |
1360 | pr_warning("Failed to load list of modules in use! " | 1405 | strdup(vmlinux_path[i])); |
1361 | "Continuing...\n"); | 1406 | goto out_fixup; |
1362 | use_modules = 0; | 1407 | } |
1363 | } | ||
1364 | |||
1365 | if (vmlinux) { | ||
1366 | err = dso__load_vmlinux(dso, kernel_map, vmlinux, filter); | ||
1367 | if (err > 0 && use_modules) { | ||
1368 | int syms = dsos__load_modules_sym(filter); | ||
1369 | |||
1370 | if (syms < 0) | ||
1371 | pr_warning("Failed to read module symbols!" | ||
1372 | " Continuing...\n"); | ||
1373 | else | ||
1374 | err += syms; | ||
1375 | } | 1408 | } |
1376 | } | 1409 | } |
1377 | 1410 | ||
1378 | if (err <= 0) | 1411 | is_kallsyms = self->long_name[0] == '['; |
1379 | err = kernel_maps__load_kallsyms(filter, use_modules); | 1412 | if (is_kallsyms) |
1413 | goto do_kallsyms; | ||
1414 | |||
1415 | err = dso__load_vmlinux(self, map, self->long_name, filter); | ||
1416 | if (err <= 0) { | ||
1417 | pr_info("The file %s cannot be used, " | ||
1418 | "trying to use /proc/kallsyms...", self->long_name); | ||
1419 | sleep(2); | ||
1420 | do_kallsyms: | ||
1421 | err = kernel_maps__load_kallsyms(filter); | ||
1422 | if (err > 0 && !is_kallsyms) | ||
1423 | dso__set_long_name(self, strdup("[kernel.kallsyms]")); | ||
1424 | } | ||
1380 | 1425 | ||
1381 | if (err > 0) { | 1426 | if (err > 0) { |
1382 | struct rb_node *node = rb_first(&dso->syms); | 1427 | out_fixup: |
1383 | struct symbol *sym = rb_entry(node, struct symbol, rb_node); | 1428 | map__fixup_start(map); |
1384 | 1429 | map__fixup_end(map); | |
1385 | kernel_map->start = sym->start; | ||
1386 | node = rb_last(&dso->syms); | ||
1387 | sym = rb_entry(node, struct symbol, rb_node); | ||
1388 | kernel_map->end = sym->end; | ||
1389 | |||
1390 | dso->origin = DSO__ORIG_KERNEL; | ||
1391 | kernel_maps__insert(kernel_map); | ||
1392 | /* | ||
1393 | * Now that we have all sorted out, just set the ->end of all | ||
1394 | * maps: | ||
1395 | */ | ||
1396 | kernel_maps__fixup_end(); | ||
1397 | dsos__add(dso); | ||
1398 | |||
1399 | if (verbose) | ||
1400 | kernel_maps__fprintf(stderr); | ||
1401 | } | 1430 | } |
1402 | 1431 | ||
1403 | return err; | 1432 | return err; |
1404 | |||
1405 | out_delete_dso: | ||
1406 | dso__delete(dso); | ||
1407 | return -1; | ||
1408 | } | 1433 | } |
1409 | 1434 | ||
1410 | LIST_HEAD(dsos); | 1435 | LIST_HEAD(dsos); |
1411 | struct dso *vdso; | 1436 | struct dso *vdso; |
1412 | |||
1413 | const char *vmlinux_name = "vmlinux"; | ||
1414 | int modules; | ||
1415 | 1437 | ||
1416 | static void dsos__add(struct dso *dso) | 1438 | static void dsos__add(struct dso *dso) |
1417 | { | 1439 | { |
@@ -1463,18 +1485,117 @@ size_t dsos__fprintf_buildid(FILE *fp) | |||
1463 | return ret; | 1485 | return ret; |
1464 | } | 1486 | } |
1465 | 1487 | ||
1466 | int load_kernel(symbol_filter_t filter) | 1488 | static int kernel_maps__create_kernel_map(const char *vmlinux_name) |
1467 | { | 1489 | { |
1468 | if (dsos__load_kernel(vmlinux_name, filter, modules) <= 0) | 1490 | struct dso *kernel = dso__new(vmlinux_name ?: "[kernel.kallsyms]"); |
1491 | |||
1492 | if (kernel == NULL) | ||
1469 | return -1; | 1493 | return -1; |
1470 | 1494 | ||
1495 | kernel_map = map__new2(0, kernel); | ||
1496 | if (kernel_map == NULL) | ||
1497 | goto out_delete_kernel_dso; | ||
1498 | |||
1499 | kernel_map->map_ip = kernel_map->unmap_ip = identity__map_ip; | ||
1500 | kernel->short_name = "[kernel]"; | ||
1501 | kernel->kernel = 1; | ||
1502 | |||
1471 | vdso = dso__new("[vdso]"); | 1503 | vdso = dso__new("[vdso]"); |
1472 | if (!vdso) | 1504 | if (vdso == NULL) |
1473 | return -1; | 1505 | goto out_delete_kernel_map; |
1506 | |||
1507 | if (sysfs__read_build_id("/sys/kernel/notes", kernel->build_id, | ||
1508 | sizeof(kernel->build_id)) == 0) | ||
1509 | kernel->has_build_id = true; | ||
1474 | 1510 | ||
1511 | kernel_maps__insert(kernel_map); | ||
1512 | dsos__add(kernel); | ||
1475 | dsos__add(vdso); | 1513 | dsos__add(vdso); |
1476 | 1514 | ||
1477 | return 0; | 1515 | return 0; |
1516 | |||
1517 | out_delete_kernel_map: | ||
1518 | map__delete(kernel_map); | ||
1519 | kernel_map = NULL; | ||
1520 | out_delete_kernel_dso: | ||
1521 | dso__delete(kernel); | ||
1522 | return -1; | ||
1523 | } | ||
1524 | |||
1525 | static void vmlinux_path__exit(void) | ||
1526 | { | ||
1527 | while (--vmlinux_path__nr_entries >= 0) { | ||
1528 | free(vmlinux_path[vmlinux_path__nr_entries]); | ||
1529 | vmlinux_path[vmlinux_path__nr_entries] = NULL; | ||
1530 | } | ||
1531 | |||
1532 | free(vmlinux_path); | ||
1533 | vmlinux_path = NULL; | ||
1534 | } | ||
1535 | |||
1536 | static int vmlinux_path__init(void) | ||
1537 | { | ||
1538 | struct utsname uts; | ||
1539 | char bf[PATH_MAX]; | ||
1540 | |||
1541 | if (uname(&uts) < 0) | ||
1542 | return -1; | ||
1543 | |||
1544 | vmlinux_path = malloc(sizeof(char *) * 5); | ||
1545 | if (vmlinux_path == NULL) | ||
1546 | return -1; | ||
1547 | |||
1548 | vmlinux_path[vmlinux_path__nr_entries] = strdup("vmlinux"); | ||
1549 | if (vmlinux_path[vmlinux_path__nr_entries] == NULL) | ||
1550 | goto out_fail; | ||
1551 | ++vmlinux_path__nr_entries; | ||
1552 | vmlinux_path[vmlinux_path__nr_entries] = strdup("/boot/vmlinux"); | ||
1553 | if (vmlinux_path[vmlinux_path__nr_entries] == NULL) | ||
1554 | goto out_fail; | ||
1555 | ++vmlinux_path__nr_entries; | ||
1556 | snprintf(bf, sizeof(bf), "/boot/vmlinux-%s", uts.release); | ||
1557 | vmlinux_path[vmlinux_path__nr_entries] = strdup(bf); | ||
1558 | if (vmlinux_path[vmlinux_path__nr_entries] == NULL) | ||
1559 | goto out_fail; | ||
1560 | ++vmlinux_path__nr_entries; | ||
1561 | snprintf(bf, sizeof(bf), "/lib/modules/%s/build/vmlinux", uts.release); | ||
1562 | vmlinux_path[vmlinux_path__nr_entries] = strdup(bf); | ||
1563 | if (vmlinux_path[vmlinux_path__nr_entries] == NULL) | ||
1564 | goto out_fail; | ||
1565 | ++vmlinux_path__nr_entries; | ||
1566 | snprintf(bf, sizeof(bf), "/usr/lib/debug/lib/modules/%s/vmlinux", | ||
1567 | uts.release); | ||
1568 | vmlinux_path[vmlinux_path__nr_entries] = strdup(bf); | ||
1569 | if (vmlinux_path[vmlinux_path__nr_entries] == NULL) | ||
1570 | goto out_fail; | ||
1571 | ++vmlinux_path__nr_entries; | ||
1572 | |||
1573 | return 0; | ||
1574 | |||
1575 | out_fail: | ||
1576 | vmlinux_path__exit(); | ||
1577 | return -1; | ||
1578 | } | ||
1579 | |||
1580 | int kernel_maps__init(const char *vmlinux_name, bool try_vmlinux_path, | ||
1581 | bool use_modules) | ||
1582 | { | ||
1583 | if (try_vmlinux_path && vmlinux_path__init() < 0) | ||
1584 | return -1; | ||
1585 | |||
1586 | if (kernel_maps__create_kernel_map(vmlinux_name) < 0) { | ||
1587 | vmlinux_path__exit(); | ||
1588 | return -1; | ||
1589 | } | ||
1590 | |||
1591 | if (use_modules && kernel_maps__create_module_maps() < 0) | ||
1592 | pr_debug("Failed to load list of modules in use, " | ||
1593 | "continuing...\n"); | ||
1594 | /* | ||
1595 | * Now that we have all the maps created, just set the ->end of them: | ||
1596 | */ | ||
1597 | kernel_maps__fixup_end(); | ||
1598 | return 0; | ||
1478 | } | 1599 | } |
1479 | 1600 | ||
1480 | void symbol__init(unsigned int priv_size) | 1601 | void symbol__init(unsigned int priv_size) |
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 5ad1019607dd..8c4d026e067a 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h | |||
@@ -64,6 +64,7 @@ struct dso { | |||
64 | u8 slen_calculated:1; | 64 | u8 slen_calculated:1; |
65 | u8 loaded:1; | 65 | u8 loaded:1; |
66 | u8 has_build_id:1; | 66 | u8 has_build_id:1; |
67 | u8 kernel:1; | ||
67 | unsigned char origin; | 68 | unsigned char origin; |
68 | u8 build_id[BUILD_ID_SIZE]; | 69 | u8 build_id[BUILD_ID_SIZE]; |
69 | u16 long_name_len; | 70 | u16 long_name_len; |
@@ -77,7 +78,6 @@ void dso__delete(struct dso *self); | |||
77 | 78 | ||
78 | struct symbol *dso__find_symbol(struct dso *self, u64 ip); | 79 | struct symbol *dso__find_symbol(struct dso *self, u64 ip); |
79 | 80 | ||
80 | int dsos__load_kernel(const char *vmlinux, symbol_filter_t filter, int modules); | ||
81 | struct dso *dsos__findnew(const char *name); | 81 | struct dso *dsos__findnew(const char *name); |
82 | int dso__load(struct dso *self, struct map *map, symbol_filter_t filter); | 82 | int dso__load(struct dso *self, struct map *map, symbol_filter_t filter); |
83 | void dsos__fprintf(FILE *fp); | 83 | void dsos__fprintf(FILE *fp); |
@@ -89,16 +89,17 @@ char dso__symtab_origin(const struct dso *self); | |||
89 | void dso__set_build_id(struct dso *self, void *build_id); | 89 | void dso__set_build_id(struct dso *self, void *build_id); |
90 | 90 | ||
91 | int filename__read_build_id(const char *filename, void *bf, size_t size); | 91 | int filename__read_build_id(const char *filename, void *bf, size_t size); |
92 | bool fetch_build_id_table(struct list_head *head); | 92 | int sysfs__read_build_id(const char *filename, void *bf, size_t size); |
93 | bool dsos__read_build_ids(void); | ||
93 | int build_id__sprintf(u8 *self, int len, char *bf); | 94 | int build_id__sprintf(u8 *self, int len, char *bf); |
94 | 95 | ||
95 | int load_kernel(symbol_filter_t filter); | 96 | int kernel_maps__init(const char *vmlinux_name, bool try_vmlinux_path, |
97 | bool use_modules); | ||
98 | size_t kernel_maps__fprintf(FILE *fp); | ||
96 | 99 | ||
97 | void symbol__init(unsigned int priv_size); | 100 | void symbol__init(unsigned int priv_size); |
98 | 101 | ||
99 | extern struct list_head dsos; | 102 | extern struct list_head dsos; |
100 | extern struct map *kernel_map; | 103 | extern struct map *kernel_map; |
101 | extern struct dso *vdso; | 104 | extern struct dso *vdso; |
102 | extern const char *vmlinux_name; | ||
103 | extern int modules; | ||
104 | #endif /* __PERF_SYMBOL */ | 105 | #endif /* __PERF_SYMBOL */ |
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index 53addd77ce8f..e4b8d437725a 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h | |||
@@ -26,7 +26,8 @@ size_t threads__fprintf(FILE *fp); | |||
26 | void maps__insert(struct rb_root *maps, struct map *map); | 26 | void maps__insert(struct rb_root *maps, struct map *map); |
27 | struct map *maps__find(struct rb_root *maps, u64 ip); | 27 | struct map *maps__find(struct rb_root *maps, u64 ip); |
28 | 28 | ||
29 | struct symbol *kernel_maps__find_symbol(const u64 ip, struct map **mapp); | 29 | struct symbol *kernel_maps__find_symbol(const u64 ip, struct map **mapp, |
30 | symbol_filter_t filter); | ||
30 | struct map *kernel_maps__find_by_dso_name(const char *name); | 31 | struct map *kernel_maps__find_by_dso_name(const char *name); |
31 | 32 | ||
32 | static inline struct map *thread__find_map(struct thread *self, u64 ip) | 33 | static inline struct map *thread__find_map(struct thread *self, u64 ip) |
diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c index 831052d4b4fb..cace35595530 100644 --- a/tools/perf/util/trace-event-info.c +++ b/tools/perf/util/trace-event-info.c | |||
@@ -33,11 +33,11 @@ | |||
33 | #include <ctype.h> | 33 | #include <ctype.h> |
34 | #include <errno.h> | 34 | #include <errno.h> |
35 | #include <stdbool.h> | 35 | #include <stdbool.h> |
36 | #include <linux/kernel.h> | ||
36 | 37 | ||
37 | #include "../perf.h" | 38 | #include "../perf.h" |
38 | #include "trace-event.h" | 39 | #include "trace-event.h" |
39 | 40 | ||
40 | |||
41 | #define VERSION "0.5" | 41 | #define VERSION "0.5" |
42 | 42 | ||
43 | #define _STR(x) #x | 43 | #define _STR(x) #x |
@@ -483,23 +483,31 @@ static struct tracepoint_path * | |||
483 | get_tracepoints_path(struct perf_event_attr *pattrs, int nb_events) | 483 | get_tracepoints_path(struct perf_event_attr *pattrs, int nb_events) |
484 | { | 484 | { |
485 | struct tracepoint_path path, *ppath = &path; | 485 | struct tracepoint_path path, *ppath = &path; |
486 | int i; | 486 | int i, nr_tracepoints = 0; |
487 | 487 | ||
488 | for (i = 0; i < nb_events; i++) { | 488 | for (i = 0; i < nb_events; i++) { |
489 | if (pattrs[i].type != PERF_TYPE_TRACEPOINT) | 489 | if (pattrs[i].type != PERF_TYPE_TRACEPOINT) |
490 | continue; | 490 | continue; |
491 | ++nr_tracepoints; | ||
491 | ppath->next = tracepoint_id_to_path(pattrs[i].config); | 492 | ppath->next = tracepoint_id_to_path(pattrs[i].config); |
492 | if (!ppath->next) | 493 | if (!ppath->next) |
493 | die("%s\n", "No memory to alloc tracepoints list"); | 494 | die("%s\n", "No memory to alloc tracepoints list"); |
494 | ppath = ppath->next; | 495 | ppath = ppath->next; |
495 | } | 496 | } |
496 | 497 | ||
497 | return path.next; | 498 | return nr_tracepoints > 0 ? path.next : NULL; |
498 | } | 499 | } |
499 | void read_tracing_data(int fd, struct perf_event_attr *pattrs, int nb_events) | 500 | |
501 | int read_tracing_data(int fd, struct perf_event_attr *pattrs, int nb_events) | ||
500 | { | 502 | { |
501 | char buf[BUFSIZ]; | 503 | char buf[BUFSIZ]; |
502 | struct tracepoint_path *tps; | 504 | struct tracepoint_path *tps = get_tracepoints_path(pattrs, nb_events); |
505 | |||
506 | /* | ||
507 | * What? No tracepoints? No sense writing anything here, bail out. | ||
508 | */ | ||
509 | if (tps == NULL) | ||
510 | return -1; | ||
503 | 511 | ||
504 | output_fd = fd; | 512 | output_fd = fd; |
505 | 513 | ||
@@ -528,11 +536,11 @@ void read_tracing_data(int fd, struct perf_event_attr *pattrs, int nb_events) | |||
528 | page_size = getpagesize(); | 536 | page_size = getpagesize(); |
529 | write_or_die(&page_size, 4); | 537 | write_or_die(&page_size, 4); |
530 | 538 | ||
531 | tps = get_tracepoints_path(pattrs, nb_events); | ||
532 | |||
533 | read_header_files(); | 539 | read_header_files(); |
534 | read_ftrace_files(tps); | 540 | read_ftrace_files(tps); |
535 | read_event_files(tps); | 541 | read_event_files(tps); |
536 | read_proc_kallsyms(); | 542 | read_proc_kallsyms(); |
537 | read_ftrace_printk(); | 543 | read_ftrace_printk(); |
544 | |||
545 | return 0; | ||
538 | } | 546 | } |
diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c index 44292e06cca4..342dfdd43f87 100644 --- a/tools/perf/util/trace-event-read.c +++ b/tools/perf/util/trace-event-read.c | |||
@@ -471,11 +471,11 @@ void trace_report(int fd) | |||
471 | 471 | ||
472 | read_or_die(buf, 3); | 472 | read_or_die(buf, 3); |
473 | if (memcmp(buf, test, 3) != 0) | 473 | if (memcmp(buf, test, 3) != 0) |
474 | die("not an trace data file"); | 474 | die("no trace data in the file"); |
475 | 475 | ||
476 | read_or_die(buf, 7); | 476 | read_or_die(buf, 7); |
477 | if (memcmp(buf, "tracing", 7) != 0) | 477 | if (memcmp(buf, "tracing", 7) != 0) |
478 | die("not a trace file (missing tracing)"); | 478 | die("not a trace file (missing 'tracing' tag)"); |
479 | 479 | ||
480 | version = read_string(); | 480 | version = read_string(); |
481 | if (show_version) | 481 | if (show_version) |
diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h index f6637c2fa1fe..dd51c6872a15 100644 --- a/tools/perf/util/trace-event.h +++ b/tools/perf/util/trace-event.h | |||
@@ -248,7 +248,7 @@ unsigned long long | |||
248 | raw_field_value(struct event *event, const char *name, void *data); | 248 | raw_field_value(struct event *event, const char *name, void *data); |
249 | void *raw_field_ptr(struct event *event, const char *name, void *data); | 249 | void *raw_field_ptr(struct event *event, const char *name, void *data); |
250 | 250 | ||
251 | void read_tracing_data(int fd, struct perf_event_attr *pattrs, int nb_events); | 251 | int read_tracing_data(int fd, struct perf_event_attr *pattrs, int nb_events); |
252 | 252 | ||
253 | /* taken from kernel/trace/trace.h */ | 253 | /* taken from kernel/trace/trace.h */ |
254 | enum trace_flag_type { | 254 | enum trace_flag_type { |
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index f2203a0946bc..e1c623e0c99e 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h | |||
@@ -84,6 +84,9 @@ | |||
84 | #include <iconv.h> | 84 | #include <iconv.h> |
85 | #endif | 85 | #endif |
86 | 86 | ||
87 | extern const char *graph_line; | ||
88 | extern const char *graph_dotted_line; | ||
89 | |||
87 | /* On most systems <limits.h> would have given us this, but | 90 | /* On most systems <limits.h> would have given us this, but |
88 | * not on some systems (e.g. GNU/Hurd). | 91 | * not on some systems (e.g. GNU/Hurd). |
89 | */ | 92 | */ |