diff options
author | Ingo Molnar <mingo@elte.hu> | 2009-11-21 08:07:23 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-11-21 08:07:23 -0500 |
commit | 96200591a34f8ecb98481c626125df43a2463b55 (patch) | |
tree | 314c376b01f254d04f9aaf449b1f9147ad177fa6 | |
parent | 7031281e02bf951a2259849217193fb9d75a9762 (diff) | |
parent | 68efa37df779b3e04280598e8b5b3a1919b65fee (diff) |
Merge branch 'tracing/hw-breakpoints' into perf/core
Conflicts:
arch/x86/kernel/kprobes.c
kernel/trace/Makefile
Merge reason: hw-breakpoints perf integration is looking
good in testing and in reviews, plus conflicts
are mounting up - so merge & resolve.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
39 files changed, 2512 insertions, 217 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index 7f418bbc261a..eef3bbb97075 100644 --- a/arch/Kconfig +++ b/arch/Kconfig | |||
@@ -126,4 +126,11 @@ config HAVE_DMA_API_DEBUG | |||
126 | config HAVE_DEFAULT_NO_SPIN_MUTEXES | 126 | config HAVE_DEFAULT_NO_SPIN_MUTEXES |
127 | bool | 127 | bool |
128 | 128 | ||
129 | config HAVE_HW_BREAKPOINT | ||
130 | bool | ||
131 | depends on HAVE_PERF_EVENTS | ||
132 | select ANON_INODES | ||
133 | select PERF_EVENTS | ||
134 | |||
135 | |||
129 | source "kernel/gcov/Kconfig" | 136 | source "kernel/gcov/Kconfig" |
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 72ace9515a07..178084b4377c 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -49,6 +49,7 @@ config X86 | |||
49 | select HAVE_KERNEL_GZIP | 49 | select HAVE_KERNEL_GZIP |
50 | select HAVE_KERNEL_BZIP2 | 50 | select HAVE_KERNEL_BZIP2 |
51 | select HAVE_KERNEL_LZMA | 51 | select HAVE_KERNEL_LZMA |
52 | select HAVE_HW_BREAKPOINT | ||
52 | select HAVE_ARCH_KMEMCHECK | 53 | select HAVE_ARCH_KMEMCHECK |
53 | 54 | ||
54 | config OUTPUT_FORMAT | 55 | config OUTPUT_FORMAT |
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild index 4a8e80cdcfa5..9f828f87ca35 100644 --- a/arch/x86/include/asm/Kbuild +++ b/arch/x86/include/asm/Kbuild | |||
@@ -10,6 +10,7 @@ header-y += ptrace-abi.h | |||
10 | header-y += sigcontext32.h | 10 | header-y += sigcontext32.h |
11 | header-y += ucontext.h | 11 | header-y += ucontext.h |
12 | header-y += processor-flags.h | 12 | header-y += processor-flags.h |
13 | header-y += hw_breakpoint.h | ||
13 | 14 | ||
14 | unifdef-y += e820.h | 15 | unifdef-y += e820.h |
15 | unifdef-y += ist.h | 16 | unifdef-y += ist.h |
diff --git a/arch/x86/include/asm/a.out-core.h b/arch/x86/include/asm/a.out-core.h index bb70e397aa84..7a15588e45d4 100644 --- a/arch/x86/include/asm/a.out-core.h +++ b/arch/x86/include/asm/a.out-core.h | |||
@@ -17,6 +17,7 @@ | |||
17 | 17 | ||
18 | #include <linux/user.h> | 18 | #include <linux/user.h> |
19 | #include <linux/elfcore.h> | 19 | #include <linux/elfcore.h> |
20 | #include <asm/debugreg.h> | ||
20 | 21 | ||
21 | /* | 22 | /* |
22 | * fill in the user structure for an a.out core dump | 23 | * fill in the user structure for an a.out core dump |
@@ -32,14 +33,7 @@ static inline void aout_dump_thread(struct pt_regs *regs, struct user *dump) | |||
32 | >> PAGE_SHIFT; | 33 | >> PAGE_SHIFT; |
33 | dump->u_dsize -= dump->u_tsize; | 34 | dump->u_dsize -= dump->u_tsize; |
34 | dump->u_ssize = 0; | 35 | dump->u_ssize = 0; |
35 | dump->u_debugreg[0] = current->thread.debugreg0; | 36 | aout_dump_debugregs(dump); |
36 | dump->u_debugreg[1] = current->thread.debugreg1; | ||
37 | dump->u_debugreg[2] = current->thread.debugreg2; | ||
38 | dump->u_debugreg[3] = current->thread.debugreg3; | ||
39 | dump->u_debugreg[4] = 0; | ||
40 | dump->u_debugreg[5] = 0; | ||
41 | dump->u_debugreg[6] = current->thread.debugreg6; | ||
42 | dump->u_debugreg[7] = current->thread.debugreg7; | ||
43 | 37 | ||
44 | if (dump->start_stack < TASK_SIZE) | 38 | if (dump->start_stack < TASK_SIZE) |
45 | dump->u_ssize = ((unsigned long)(TASK_SIZE - dump->start_stack)) | 39 | dump->u_ssize = ((unsigned long)(TASK_SIZE - dump->start_stack)) |
diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h index 3ea6f37be9e2..fdabd8435765 100644 --- a/arch/x86/include/asm/debugreg.h +++ b/arch/x86/include/asm/debugreg.h | |||
@@ -18,6 +18,7 @@ | |||
18 | #define DR_TRAP1 (0x2) /* db1 */ | 18 | #define DR_TRAP1 (0x2) /* db1 */ |
19 | #define DR_TRAP2 (0x4) /* db2 */ | 19 | #define DR_TRAP2 (0x4) /* db2 */ |
20 | #define DR_TRAP3 (0x8) /* db3 */ | 20 | #define DR_TRAP3 (0x8) /* db3 */ |
21 | #define DR_TRAP_BITS (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3) | ||
21 | 22 | ||
22 | #define DR_STEP (0x4000) /* single-step */ | 23 | #define DR_STEP (0x4000) /* single-step */ |
23 | #define DR_SWITCH (0x8000) /* task switch */ | 24 | #define DR_SWITCH (0x8000) /* task switch */ |
@@ -49,6 +50,8 @@ | |||
49 | 50 | ||
50 | #define DR_LOCAL_ENABLE_SHIFT 0 /* Extra shift to the local enable bit */ | 51 | #define DR_LOCAL_ENABLE_SHIFT 0 /* Extra shift to the local enable bit */ |
51 | #define DR_GLOBAL_ENABLE_SHIFT 1 /* Extra shift to the global enable bit */ | 52 | #define DR_GLOBAL_ENABLE_SHIFT 1 /* Extra shift to the global enable bit */ |
53 | #define DR_LOCAL_ENABLE (0x1) /* Local enable for reg 0 */ | ||
54 | #define DR_GLOBAL_ENABLE (0x2) /* Global enable for reg 0 */ | ||
52 | #define DR_ENABLE_SIZE 2 /* 2 enable bits per register */ | 55 | #define DR_ENABLE_SIZE 2 /* 2 enable bits per register */ |
53 | 56 | ||
54 | #define DR_LOCAL_ENABLE_MASK (0x55) /* Set local bits for all 4 regs */ | 57 | #define DR_LOCAL_ENABLE_MASK (0x55) /* Set local bits for all 4 regs */ |
@@ -67,4 +70,34 @@ | |||
67 | #define DR_LOCAL_SLOWDOWN (0x100) /* Local slow the pipeline */ | 70 | #define DR_LOCAL_SLOWDOWN (0x100) /* Local slow the pipeline */ |
68 | #define DR_GLOBAL_SLOWDOWN (0x200) /* Global slow the pipeline */ | 71 | #define DR_GLOBAL_SLOWDOWN (0x200) /* Global slow the pipeline */ |
69 | 72 | ||
73 | /* | ||
74 | * HW breakpoint additions | ||
75 | */ | ||
76 | #ifdef __KERNEL__ | ||
77 | |||
78 | DECLARE_PER_CPU(unsigned long, dr7); | ||
79 | |||
80 | static inline void hw_breakpoint_disable(void) | ||
81 | { | ||
82 | /* Zero the control register for HW Breakpoint */ | ||
83 | set_debugreg(0UL, 7); | ||
84 | |||
85 | /* Zero-out the individual HW breakpoint address registers */ | ||
86 | set_debugreg(0UL, 0); | ||
87 | set_debugreg(0UL, 1); | ||
88 | set_debugreg(0UL, 2); | ||
89 | set_debugreg(0UL, 3); | ||
90 | } | ||
91 | |||
92 | static inline int hw_breakpoint_active(void) | ||
93 | { | ||
94 | return __get_cpu_var(dr7) & DR_GLOBAL_ENABLE_MASK; | ||
95 | } | ||
96 | |||
97 | extern void aout_dump_debugregs(struct user *dump); | ||
98 | |||
99 | extern void hw_breakpoint_restore(void); | ||
100 | |||
101 | #endif /* __KERNEL__ */ | ||
102 | |||
70 | #endif /* _ASM_X86_DEBUGREG_H */ | 103 | #endif /* _ASM_X86_DEBUGREG_H */ |
diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h new file mode 100644 index 000000000000..0675a7c4c20e --- /dev/null +++ b/arch/x86/include/asm/hw_breakpoint.h | |||
@@ -0,0 +1,73 @@ | |||
1 | #ifndef _I386_HW_BREAKPOINT_H | ||
2 | #define _I386_HW_BREAKPOINT_H | ||
3 | |||
4 | #ifdef __KERNEL__ | ||
5 | #define __ARCH_HW_BREAKPOINT_H | ||
6 | |||
7 | /* | ||
8 | * The name should probably be something dealt in | ||
9 | * a higher level. While dealing with the user | ||
10 | * (display/resolving) | ||
11 | */ | ||
12 | struct arch_hw_breakpoint { | ||
13 | char *name; /* Contains name of the symbol to set bkpt */ | ||
14 | unsigned long address; | ||
15 | u8 len; | ||
16 | u8 type; | ||
17 | }; | ||
18 | |||
19 | #include <linux/kdebug.h> | ||
20 | #include <linux/percpu.h> | ||
21 | #include <linux/list.h> | ||
22 | |||
23 | /* Available HW breakpoint length encodings */ | ||
24 | #define X86_BREAKPOINT_LEN_1 0x40 | ||
25 | #define X86_BREAKPOINT_LEN_2 0x44 | ||
26 | #define X86_BREAKPOINT_LEN_4 0x4c | ||
27 | #define X86_BREAKPOINT_LEN_EXECUTE 0x40 | ||
28 | |||
29 | #ifdef CONFIG_X86_64 | ||
30 | #define X86_BREAKPOINT_LEN_8 0x48 | ||
31 | #endif | ||
32 | |||
33 | /* Available HW breakpoint type encodings */ | ||
34 | |||
35 | /* trigger on instruction execute */ | ||
36 | #define X86_BREAKPOINT_EXECUTE 0x80 | ||
37 | /* trigger on memory write */ | ||
38 | #define X86_BREAKPOINT_WRITE 0x81 | ||
39 | /* trigger on memory read or write */ | ||
40 | #define X86_BREAKPOINT_RW 0x83 | ||
41 | |||
42 | /* Total number of available HW breakpoint registers */ | ||
43 | #define HBP_NUM 4 | ||
44 | |||
45 | struct perf_event; | ||
46 | struct pmu; | ||
47 | |||
48 | extern int arch_check_va_in_userspace(unsigned long va, u8 hbp_len); | ||
49 | extern int arch_validate_hwbkpt_settings(struct perf_event *bp, | ||
50 | struct task_struct *tsk); | ||
51 | extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused, | ||
52 | unsigned long val, void *data); | ||
53 | |||
54 | |||
55 | int arch_install_hw_breakpoint(struct perf_event *bp); | ||
56 | void arch_uninstall_hw_breakpoint(struct perf_event *bp); | ||
57 | void hw_breakpoint_pmu_read(struct perf_event *bp); | ||
58 | void hw_breakpoint_pmu_unthrottle(struct perf_event *bp); | ||
59 | |||
60 | extern void | ||
61 | arch_fill_perf_breakpoint(struct perf_event *bp); | ||
62 | |||
63 | unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type); | ||
64 | int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, unsigned *type); | ||
65 | |||
66 | extern int arch_bp_generic_fields(int x86_len, int x86_type, | ||
67 | int *gen_len, int *gen_type); | ||
68 | |||
69 | extern struct pmu perf_ops_bp; | ||
70 | |||
71 | #endif /* __KERNEL__ */ | ||
72 | #endif /* _I386_HW_BREAKPOINT_H */ | ||
73 | |||
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index c9786480f0fe..6f8ec1c37e0a 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h | |||
@@ -30,6 +30,7 @@ struct mm_struct; | |||
30 | #include <linux/math64.h> | 30 | #include <linux/math64.h> |
31 | #include <linux/init.h> | 31 | #include <linux/init.h> |
32 | 32 | ||
33 | #define HBP_NUM 4 | ||
33 | /* | 34 | /* |
34 | * Default implementation of macro that returns current | 35 | * Default implementation of macro that returns current |
35 | * instruction pointer ("program counter"). | 36 | * instruction pointer ("program counter"). |
@@ -422,6 +423,8 @@ extern unsigned int xstate_size; | |||
422 | extern void free_thread_xstate(struct task_struct *); | 423 | extern void free_thread_xstate(struct task_struct *); |
423 | extern struct kmem_cache *task_xstate_cachep; | 424 | extern struct kmem_cache *task_xstate_cachep; |
424 | 425 | ||
426 | struct perf_event; | ||
427 | |||
425 | struct thread_struct { | 428 | struct thread_struct { |
426 | /* Cached TLS descriptors: */ | 429 | /* Cached TLS descriptors: */ |
427 | struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES]; | 430 | struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES]; |
@@ -443,13 +446,10 @@ struct thread_struct { | |||
443 | unsigned long fs; | 446 | unsigned long fs; |
444 | #endif | 447 | #endif |
445 | unsigned long gs; | 448 | unsigned long gs; |
446 | /* Hardware debugging registers: */ | 449 | /* Save middle states of ptrace breakpoints */ |
447 | unsigned long debugreg0; | 450 | struct perf_event *ptrace_bps[HBP_NUM]; |
448 | unsigned long debugreg1; | 451 | /* Debug status used for traps, single steps, etc... */ |
449 | unsigned long debugreg2; | 452 | unsigned long debugreg6; |
450 | unsigned long debugreg3; | ||
451 | unsigned long debugreg6; | ||
452 | unsigned long debugreg7; | ||
453 | /* Fault info: */ | 453 | /* Fault info: */ |
454 | unsigned long cr2; | 454 | unsigned long cr2; |
455 | unsigned long trap_no; | 455 | unsigned long trap_no; |
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index d8e5d0cdd678..4f2e66e29ecc 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -40,7 +40,7 @@ obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o | |||
40 | obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o | 40 | obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o |
41 | obj-y += bootflag.o e820.o | 41 | obj-y += bootflag.o e820.o |
42 | obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o | 42 | obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o |
43 | obj-y += alternative.o i8253.o pci-nommu.o | 43 | obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o |
44 | obj-y += tsc.o io_delay.o rtc.o | 44 | obj-y += tsc.o io_delay.o rtc.o |
45 | 45 | ||
46 | obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o | 46 | obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o |
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c new file mode 100644 index 000000000000..752daebe91c6 --- /dev/null +++ b/arch/x86/kernel/hw_breakpoint.c | |||
@@ -0,0 +1,545 @@ | |||
1 | /* | ||
2 | * This program is free software; you can redistribute it and/or modify | ||
3 | * it under the terms of the GNU General Public License as published by | ||
4 | * the Free Software Foundation; either version 2 of the License, or | ||
5 | * (at your option) any later version. | ||
6 | * | ||
7 | * This program is distributed in the hope that it will be useful, | ||
8 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
10 | * GNU General Public License for more details. | ||
11 | * | ||
12 | * You should have received a copy of the GNU General Public License | ||
13 | * along with this program; if not, write to the Free Software | ||
14 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
15 | * | ||
16 | * Copyright (C) 2007 Alan Stern | ||
17 | * Copyright (C) 2009 IBM Corporation | ||
18 | * Copyright (C) 2009 Frederic Weisbecker <fweisbec@gmail.com> | ||
19 | */ | ||
20 | |||
21 | /* | ||
22 | * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility, | ||
23 | * using the CPU's debug registers. | ||
24 | */ | ||
25 | |||
26 | #include <linux/perf_event.h> | ||
27 | #include <linux/hw_breakpoint.h> | ||
28 | #include <linux/irqflags.h> | ||
29 | #include <linux/notifier.h> | ||
30 | #include <linux/kallsyms.h> | ||
31 | #include <linux/kprobes.h> | ||
32 | #include <linux/percpu.h> | ||
33 | #include <linux/kdebug.h> | ||
34 | #include <linux/kernel.h> | ||
35 | #include <linux/module.h> | ||
36 | #include <linux/sched.h> | ||
37 | #include <linux/init.h> | ||
38 | #include <linux/smp.h> | ||
39 | |||
40 | #include <asm/hw_breakpoint.h> | ||
41 | #include <asm/processor.h> | ||
42 | #include <asm/debugreg.h> | ||
43 | |||
44 | /* Per cpu debug control register value */ | ||
45 | DEFINE_PER_CPU(unsigned long, dr7); | ||
46 | EXPORT_PER_CPU_SYMBOL(dr7); | ||
47 | |||
48 | /* Per cpu debug address registers values */ | ||
49 | static DEFINE_PER_CPU(unsigned long, cpu_debugreg[HBP_NUM]); | ||
50 | |||
51 | /* | ||
52 | * Stores the breakpoints currently in use on each breakpoint address | ||
53 | * register for each cpus | ||
54 | */ | ||
55 | static DEFINE_PER_CPU(struct perf_event *, bp_per_reg[HBP_NUM]); | ||
56 | |||
57 | |||
58 | /* | ||
59 | * Encode the length, type, Exact, and Enable bits for a particular breakpoint | ||
60 | * as stored in debug register 7. | ||
61 | */ | ||
62 | unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type) | ||
63 | { | ||
64 | unsigned long bp_info; | ||
65 | |||
66 | bp_info = (len | type) & 0xf; | ||
67 | bp_info <<= (DR_CONTROL_SHIFT + drnum * DR_CONTROL_SIZE); | ||
68 | bp_info |= (DR_GLOBAL_ENABLE << (drnum * DR_ENABLE_SIZE)) | | ||
69 | DR_GLOBAL_SLOWDOWN; | ||
70 | return bp_info; | ||
71 | } | ||
72 | |||
73 | /* | ||
74 | * Decode the length and type bits for a particular breakpoint as | ||
75 | * stored in debug register 7. Return the "enabled" status. | ||
76 | */ | ||
77 | int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, unsigned *type) | ||
78 | { | ||
79 | int bp_info = dr7 >> (DR_CONTROL_SHIFT + bpnum * DR_CONTROL_SIZE); | ||
80 | |||
81 | *len = (bp_info & 0xc) | 0x40; | ||
82 | *type = (bp_info & 0x3) | 0x80; | ||
83 | |||
84 | return (dr7 >> (bpnum * DR_ENABLE_SIZE)) & 0x3; | ||
85 | } | ||
86 | |||
87 | /* | ||
88 | * Install a perf counter breakpoint. | ||
89 | * | ||
90 | * We seek a free debug address register and use it for this | ||
91 | * breakpoint. Eventually we enable it in the debug control register. | ||
92 | * | ||
93 | * Atomic: we hold the counter->ctx->lock and we only handle variables | ||
94 | * and registers local to this cpu. | ||
95 | */ | ||
96 | int arch_install_hw_breakpoint(struct perf_event *bp) | ||
97 | { | ||
98 | struct arch_hw_breakpoint *info = counter_arch_bp(bp); | ||
99 | unsigned long *dr7; | ||
100 | int i; | ||
101 | |||
102 | for (i = 0; i < HBP_NUM; i++) { | ||
103 | struct perf_event **slot = &__get_cpu_var(bp_per_reg[i]); | ||
104 | |||
105 | if (!*slot) { | ||
106 | *slot = bp; | ||
107 | break; | ||
108 | } | ||
109 | } | ||
110 | |||
111 | if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot")) | ||
112 | return -EBUSY; | ||
113 | |||
114 | set_debugreg(info->address, i); | ||
115 | __get_cpu_var(cpu_debugreg[i]) = info->address; | ||
116 | |||
117 | dr7 = &__get_cpu_var(dr7); | ||
118 | *dr7 |= encode_dr7(i, info->len, info->type); | ||
119 | |||
120 | set_debugreg(*dr7, 7); | ||
121 | |||
122 | return 0; | ||
123 | } | ||
124 | |||
125 | /* | ||
126 | * Uninstall the breakpoint contained in the given counter. | ||
127 | * | ||
128 | * First we search the debug address register it uses and then we disable | ||
129 | * it. | ||
130 | * | ||
131 | * Atomic: we hold the counter->ctx->lock and we only handle variables | ||
132 | * and registers local to this cpu. | ||
133 | */ | ||
134 | void arch_uninstall_hw_breakpoint(struct perf_event *bp) | ||
135 | { | ||
136 | struct arch_hw_breakpoint *info = counter_arch_bp(bp); | ||
137 | unsigned long *dr7; | ||
138 | int i; | ||
139 | |||
140 | for (i = 0; i < HBP_NUM; i++) { | ||
141 | struct perf_event **slot = &__get_cpu_var(bp_per_reg[i]); | ||
142 | |||
143 | if (*slot == bp) { | ||
144 | *slot = NULL; | ||
145 | break; | ||
146 | } | ||
147 | } | ||
148 | |||
149 | if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot")) | ||
150 | return; | ||
151 | |||
152 | dr7 = &__get_cpu_var(dr7); | ||
153 | *dr7 &= ~encode_dr7(i, info->len, info->type); | ||
154 | |||
155 | set_debugreg(*dr7, 7); | ||
156 | } | ||
157 | |||
158 | static int get_hbp_len(u8 hbp_len) | ||
159 | { | ||
160 | unsigned int len_in_bytes = 0; | ||
161 | |||
162 | switch (hbp_len) { | ||
163 | case X86_BREAKPOINT_LEN_1: | ||
164 | len_in_bytes = 1; | ||
165 | break; | ||
166 | case X86_BREAKPOINT_LEN_2: | ||
167 | len_in_bytes = 2; | ||
168 | break; | ||
169 | case X86_BREAKPOINT_LEN_4: | ||
170 | len_in_bytes = 4; | ||
171 | break; | ||
172 | #ifdef CONFIG_X86_64 | ||
173 | case X86_BREAKPOINT_LEN_8: | ||
174 | len_in_bytes = 8; | ||
175 | break; | ||
176 | #endif | ||
177 | } | ||
178 | return len_in_bytes; | ||
179 | } | ||
180 | |||
181 | /* | ||
182 | * Check for virtual address in user space. | ||
183 | */ | ||
184 | int arch_check_va_in_userspace(unsigned long va, u8 hbp_len) | ||
185 | { | ||
186 | unsigned int len; | ||
187 | |||
188 | len = get_hbp_len(hbp_len); | ||
189 | |||
190 | return (va <= TASK_SIZE - len); | ||
191 | } | ||
192 | |||
193 | /* | ||
194 | * Check for virtual address in kernel space. | ||
195 | */ | ||
196 | static int arch_check_va_in_kernelspace(unsigned long va, u8 hbp_len) | ||
197 | { | ||
198 | unsigned int len; | ||
199 | |||
200 | len = get_hbp_len(hbp_len); | ||
201 | |||
202 | return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE); | ||
203 | } | ||
204 | |||
205 | /* | ||
206 | * Store a breakpoint's encoded address, length, and type. | ||
207 | */ | ||
208 | static int arch_store_info(struct perf_event *bp) | ||
209 | { | ||
210 | struct arch_hw_breakpoint *info = counter_arch_bp(bp); | ||
211 | /* | ||
212 | * For kernel-addresses, either the address or symbol name can be | ||
213 | * specified. | ||
214 | */ | ||
215 | if (info->name) | ||
216 | info->address = (unsigned long) | ||
217 | kallsyms_lookup_name(info->name); | ||
218 | if (info->address) | ||
219 | return 0; | ||
220 | |||
221 | return -EINVAL; | ||
222 | } | ||
223 | |||
224 | int arch_bp_generic_fields(int x86_len, int x86_type, | ||
225 | int *gen_len, int *gen_type) | ||
226 | { | ||
227 | /* Len */ | ||
228 | switch (x86_len) { | ||
229 | case X86_BREAKPOINT_LEN_1: | ||
230 | *gen_len = HW_BREAKPOINT_LEN_1; | ||
231 | break; | ||
232 | case X86_BREAKPOINT_LEN_2: | ||
233 | *gen_len = HW_BREAKPOINT_LEN_2; | ||
234 | break; | ||
235 | case X86_BREAKPOINT_LEN_4: | ||
236 | *gen_len = HW_BREAKPOINT_LEN_4; | ||
237 | break; | ||
238 | #ifdef CONFIG_X86_64 | ||
239 | case X86_BREAKPOINT_LEN_8: | ||
240 | *gen_len = HW_BREAKPOINT_LEN_8; | ||
241 | break; | ||
242 | #endif | ||
243 | default: | ||
244 | return -EINVAL; | ||
245 | } | ||
246 | |||
247 | /* Type */ | ||
248 | switch (x86_type) { | ||
249 | case X86_BREAKPOINT_EXECUTE: | ||
250 | *gen_type = HW_BREAKPOINT_X; | ||
251 | break; | ||
252 | case X86_BREAKPOINT_WRITE: | ||
253 | *gen_type = HW_BREAKPOINT_W; | ||
254 | break; | ||
255 | case X86_BREAKPOINT_RW: | ||
256 | *gen_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R; | ||
257 | break; | ||
258 | default: | ||
259 | return -EINVAL; | ||
260 | } | ||
261 | |||
262 | return 0; | ||
263 | } | ||
264 | |||
265 | |||
266 | static int arch_build_bp_info(struct perf_event *bp) | ||
267 | { | ||
268 | struct arch_hw_breakpoint *info = counter_arch_bp(bp); | ||
269 | |||
270 | info->address = bp->attr.bp_addr; | ||
271 | |||
272 | /* Len */ | ||
273 | switch (bp->attr.bp_len) { | ||
274 | case HW_BREAKPOINT_LEN_1: | ||
275 | info->len = X86_BREAKPOINT_LEN_1; | ||
276 | break; | ||
277 | case HW_BREAKPOINT_LEN_2: | ||
278 | info->len = X86_BREAKPOINT_LEN_2; | ||
279 | break; | ||
280 | case HW_BREAKPOINT_LEN_4: | ||
281 | info->len = X86_BREAKPOINT_LEN_4; | ||
282 | break; | ||
283 | #ifdef CONFIG_X86_64 | ||
284 | case HW_BREAKPOINT_LEN_8: | ||
285 | info->len = X86_BREAKPOINT_LEN_8; | ||
286 | break; | ||
287 | #endif | ||
288 | default: | ||
289 | return -EINVAL; | ||
290 | } | ||
291 | |||
292 | /* Type */ | ||
293 | switch (bp->attr.bp_type) { | ||
294 | case HW_BREAKPOINT_W: | ||
295 | info->type = X86_BREAKPOINT_WRITE; | ||
296 | break; | ||
297 | case HW_BREAKPOINT_W | HW_BREAKPOINT_R: | ||
298 | info->type = X86_BREAKPOINT_RW; | ||
299 | break; | ||
300 | case HW_BREAKPOINT_X: | ||
301 | info->type = X86_BREAKPOINT_EXECUTE; | ||
302 | break; | ||
303 | default: | ||
304 | return -EINVAL; | ||
305 | } | ||
306 | |||
307 | return 0; | ||
308 | } | ||
309 | /* | ||
310 | * Validate the arch-specific HW Breakpoint register settings | ||
311 | */ | ||
312 | int arch_validate_hwbkpt_settings(struct perf_event *bp, | ||
313 | struct task_struct *tsk) | ||
314 | { | ||
315 | struct arch_hw_breakpoint *info = counter_arch_bp(bp); | ||
316 | unsigned int align; | ||
317 | int ret; | ||
318 | |||
319 | |||
320 | ret = arch_build_bp_info(bp); | ||
321 | if (ret) | ||
322 | return ret; | ||
323 | |||
324 | ret = -EINVAL; | ||
325 | |||
326 | if (info->type == X86_BREAKPOINT_EXECUTE) | ||
327 | /* | ||
328 | * Ptrace-refactoring code | ||
329 | * For now, we'll allow instruction breakpoint only for user-space | ||
330 | * addresses | ||
331 | */ | ||
332 | if ((!arch_check_va_in_userspace(info->address, info->len)) && | ||
333 | info->len != X86_BREAKPOINT_EXECUTE) | ||
334 | return ret; | ||
335 | |||
336 | switch (info->len) { | ||
337 | case X86_BREAKPOINT_LEN_1: | ||
338 | align = 0; | ||
339 | break; | ||
340 | case X86_BREAKPOINT_LEN_2: | ||
341 | align = 1; | ||
342 | break; | ||
343 | case X86_BREAKPOINT_LEN_4: | ||
344 | align = 3; | ||
345 | break; | ||
346 | #ifdef CONFIG_X86_64 | ||
347 | case X86_BREAKPOINT_LEN_8: | ||
348 | align = 7; | ||
349 | break; | ||
350 | #endif | ||
351 | default: | ||
352 | return ret; | ||
353 | } | ||
354 | |||
355 | if (bp->callback) | ||
356 | ret = arch_store_info(bp); | ||
357 | |||
358 | if (ret < 0) | ||
359 | return ret; | ||
360 | /* | ||
361 | * Check that the low-order bits of the address are appropriate | ||
362 | * for the alignment implied by len. | ||
363 | */ | ||
364 | if (info->address & align) | ||
365 | return -EINVAL; | ||
366 | |||
367 | /* Check that the virtual address is in the proper range */ | ||
368 | if (tsk) { | ||
369 | if (!arch_check_va_in_userspace(info->address, info->len)) | ||
370 | return -EFAULT; | ||
371 | } else { | ||
372 | if (!arch_check_va_in_kernelspace(info->address, info->len)) | ||
373 | return -EFAULT; | ||
374 | } | ||
375 | |||
376 | return 0; | ||
377 | } | ||
378 | |||
379 | /* | ||
380 | * Dump the debug register contents to the user. | ||
381 | * We can't dump our per cpu values because it | ||
382 | * may contain cpu wide breakpoint, something that | ||
383 | * doesn't belong to the current task. | ||
384 | * | ||
385 | * TODO: include non-ptrace user breakpoints (perf) | ||
386 | */ | ||
387 | void aout_dump_debugregs(struct user *dump) | ||
388 | { | ||
389 | int i; | ||
390 | int dr7 = 0; | ||
391 | struct perf_event *bp; | ||
392 | struct arch_hw_breakpoint *info; | ||
393 | struct thread_struct *thread = ¤t->thread; | ||
394 | |||
395 | for (i = 0; i < HBP_NUM; i++) { | ||
396 | bp = thread->ptrace_bps[i]; | ||
397 | |||
398 | if (bp && !bp->attr.disabled) { | ||
399 | dump->u_debugreg[i] = bp->attr.bp_addr; | ||
400 | info = counter_arch_bp(bp); | ||
401 | dr7 |= encode_dr7(i, info->len, info->type); | ||
402 | } else { | ||
403 | dump->u_debugreg[i] = 0; | ||
404 | } | ||
405 | } | ||
406 | |||
407 | dump->u_debugreg[4] = 0; | ||
408 | dump->u_debugreg[5] = 0; | ||
409 | dump->u_debugreg[6] = current->thread.debugreg6; | ||
410 | |||
411 | dump->u_debugreg[7] = dr7; | ||
412 | } | ||
413 | EXPORT_SYMBOL_GPL(aout_dump_debugregs); | ||
414 | |||
415 | /* | ||
416 | * Release the user breakpoints used by ptrace | ||
417 | */ | ||
418 | void flush_ptrace_hw_breakpoint(struct task_struct *tsk) | ||
419 | { | ||
420 | int i; | ||
421 | struct thread_struct *t = &tsk->thread; | ||
422 | |||
423 | for (i = 0; i < HBP_NUM; i++) { | ||
424 | unregister_hw_breakpoint(t->ptrace_bps[i]); | ||
425 | t->ptrace_bps[i] = NULL; | ||
426 | } | ||
427 | } | ||
428 | |||
429 | void hw_breakpoint_restore(void) | ||
430 | { | ||
431 | set_debugreg(__get_cpu_var(cpu_debugreg[0]), 0); | ||
432 | set_debugreg(__get_cpu_var(cpu_debugreg[1]), 1); | ||
433 | set_debugreg(__get_cpu_var(cpu_debugreg[2]), 2); | ||
434 | set_debugreg(__get_cpu_var(cpu_debugreg[3]), 3); | ||
435 | set_debugreg(current->thread.debugreg6, 6); | ||
436 | set_debugreg(__get_cpu_var(dr7), 7); | ||
437 | } | ||
438 | EXPORT_SYMBOL_GPL(hw_breakpoint_restore); | ||
439 | |||
440 | /* | ||
441 | * Handle debug exception notifications. | ||
442 | * | ||
443 | * Return value is either NOTIFY_STOP or NOTIFY_DONE as explained below. | ||
444 | * | ||
445 | * NOTIFY_DONE returned if one of the following conditions is true. | ||
446 | * i) When the causative address is from user-space and the exception | ||
447 | * is a valid one, i.e. not triggered as a result of lazy debug register | ||
448 | * switching | ||
449 | * ii) When there are more bits than trap<n> set in DR6 register (such | ||
450 | * as BD, BS or BT) indicating that more than one debug condition is | ||
451 | * met and requires some more action in do_debug(). | ||
452 | * | ||
453 | * NOTIFY_STOP returned for all other cases | ||
454 | * | ||
455 | */ | ||
456 | static int __kprobes hw_breakpoint_handler(struct die_args *args) | ||
457 | { | ||
458 | int i, cpu, rc = NOTIFY_STOP; | ||
459 | struct perf_event *bp; | ||
460 | unsigned long dr7, dr6; | ||
461 | unsigned long *dr6_p; | ||
462 | |||
463 | /* The DR6 value is pointed by args->err */ | ||
464 | dr6_p = (unsigned long *)ERR_PTR(args->err); | ||
465 | dr6 = *dr6_p; | ||
466 | |||
467 | /* Do an early return if no trap bits are set in DR6 */ | ||
468 | if ((dr6 & DR_TRAP_BITS) == 0) | ||
469 | return NOTIFY_DONE; | ||
470 | |||
471 | get_debugreg(dr7, 7); | ||
472 | /* Disable breakpoints during exception handling */ | ||
473 | set_debugreg(0UL, 7); | ||
474 | /* | ||
475 | * Assert that local interrupts are disabled | ||
476 | * Reset the DRn bits in the virtualized register value. | ||
477 | * The ptrace trigger routine will add in whatever is needed. | ||
478 | */ | ||
479 | current->thread.debugreg6 &= ~DR_TRAP_BITS; | ||
480 | cpu = get_cpu(); | ||
481 | |||
482 | /* Handle all the breakpoints that were triggered */ | ||
483 | for (i = 0; i < HBP_NUM; ++i) { | ||
484 | if (likely(!(dr6 & (DR_TRAP0 << i)))) | ||
485 | continue; | ||
486 | |||
487 | /* | ||
488 | * The counter may be concurrently released but that can only | ||
489 | * occur from a call_rcu() path. We can then safely fetch | ||
490 | * the breakpoint, use its callback, touch its counter | ||
491 | * while we are in an rcu_read_lock() path. | ||
492 | */ | ||
493 | rcu_read_lock(); | ||
494 | |||
495 | bp = per_cpu(bp_per_reg[i], cpu); | ||
496 | if (bp) | ||
497 | rc = NOTIFY_DONE; | ||
498 | /* | ||
499 | * Reset the 'i'th TRAP bit in dr6 to denote completion of | ||
500 | * exception handling | ||
501 | */ | ||
502 | (*dr6_p) &= ~(DR_TRAP0 << i); | ||
503 | /* | ||
504 | * bp can be NULL due to lazy debug register switching | ||
505 | * or due to concurrent perf counter removing. | ||
506 | */ | ||
507 | if (!bp) { | ||
508 | rcu_read_unlock(); | ||
509 | break; | ||
510 | } | ||
511 | |||
512 | (bp->callback)(bp, args->regs); | ||
513 | |||
514 | rcu_read_unlock(); | ||
515 | } | ||
516 | if (dr6 & (~DR_TRAP_BITS)) | ||
517 | rc = NOTIFY_DONE; | ||
518 | |||
519 | set_debugreg(dr7, 7); | ||
520 | put_cpu(); | ||
521 | |||
522 | return rc; | ||
523 | } | ||
524 | |||
525 | /* | ||
526 | * Handle debug exception notifications. | ||
527 | */ | ||
528 | int __kprobes hw_breakpoint_exceptions_notify( | ||
529 | struct notifier_block *unused, unsigned long val, void *data) | ||
530 | { | ||
531 | if (val != DIE_DEBUG) | ||
532 | return NOTIFY_DONE; | ||
533 | |||
534 | return hw_breakpoint_handler(data); | ||
535 | } | ||
536 | |||
537 | void hw_breakpoint_pmu_read(struct perf_event *bp) | ||
538 | { | ||
539 | /* TODO */ | ||
540 | } | ||
541 | |||
542 | void hw_breakpoint_pmu_unthrottle(struct perf_event *bp) | ||
543 | { | ||
544 | /* TODO */ | ||
545 | } | ||
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 8d82a77a3f3b..34e86b67550c 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c | |||
@@ -43,6 +43,7 @@ | |||
43 | #include <linux/smp.h> | 43 | #include <linux/smp.h> |
44 | #include <linux/nmi.h> | 44 | #include <linux/nmi.h> |
45 | 45 | ||
46 | #include <asm/debugreg.h> | ||
46 | #include <asm/apicdef.h> | 47 | #include <asm/apicdef.h> |
47 | #include <asm/system.h> | 48 | #include <asm/system.h> |
48 | 49 | ||
@@ -434,6 +435,11 @@ single_step_cont(struct pt_regs *regs, struct die_args *args) | |||
434 | "resuming...\n"); | 435 | "resuming...\n"); |
435 | kgdb_arch_handle_exception(args->trapnr, args->signr, | 436 | kgdb_arch_handle_exception(args->trapnr, args->signr, |
436 | args->err, "c", "", regs); | 437 | args->err, "c", "", regs); |
438 | /* | ||
439 | * Reset the BS bit in dr6 (pointed by args->err) to | ||
440 | * denote completion of processing | ||
441 | */ | ||
442 | (*(unsigned long *)ERR_PTR(args->err)) &= ~DR_STEP; | ||
437 | 443 | ||
438 | return NOTIFY_STOP; | 444 | return NOTIFY_STOP; |
439 | } | 445 | } |
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index c5f1f117e0c0..3fe86d706a14 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c | |||
@@ -56,6 +56,7 @@ | |||
56 | #include <asm/uaccess.h> | 56 | #include <asm/uaccess.h> |
57 | #include <asm/alternative.h> | 57 | #include <asm/alternative.h> |
58 | #include <asm/insn.h> | 58 | #include <asm/insn.h> |
59 | #include <asm/debugreg.h> | ||
59 | 60 | ||
60 | void jprobe_return_end(void); | 61 | void jprobe_return_end(void); |
61 | 62 | ||
@@ -945,8 +946,14 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, | |||
945 | ret = NOTIFY_STOP; | 946 | ret = NOTIFY_STOP; |
946 | break; | 947 | break; |
947 | case DIE_DEBUG: | 948 | case DIE_DEBUG: |
948 | if (post_kprobe_handler(args->regs)) | 949 | if (post_kprobe_handler(args->regs)) { |
950 | /* | ||
951 | * Reset the BS bit in dr6 (pointed by args->err) to | ||
952 | * denote completion of processing | ||
953 | */ | ||
954 | (*(unsigned long *)ERR_PTR(args->err)) &= ~DR_STEP; | ||
949 | ret = NOTIFY_STOP; | 955 | ret = NOTIFY_STOP; |
956 | } | ||
950 | break; | 957 | break; |
951 | case DIE_GPF: | 958 | case DIE_GPF: |
952 | /* | 959 | /* |
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index c1c429d00130..c843f8406da2 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #include <asm/desc.h> | 25 | #include <asm/desc.h> |
26 | #include <asm/system.h> | 26 | #include <asm/system.h> |
27 | #include <asm/cacheflush.h> | 27 | #include <asm/cacheflush.h> |
28 | #include <asm/debugreg.h> | ||
28 | 29 | ||
29 | static void set_idt(void *newidt, __u16 limit) | 30 | static void set_idt(void *newidt, __u16 limit) |
30 | { | 31 | { |
@@ -202,6 +203,7 @@ void machine_kexec(struct kimage *image) | |||
202 | 203 | ||
203 | /* Interrupts aren't acceptable while we reboot */ | 204 | /* Interrupts aren't acceptable while we reboot */ |
204 | local_irq_disable(); | 205 | local_irq_disable(); |
206 | hw_breakpoint_disable(); | ||
205 | 207 | ||
206 | if (image->preserve_context) { | 208 | if (image->preserve_context) { |
207 | #ifdef CONFIG_X86_IO_APIC | 209 | #ifdef CONFIG_X86_IO_APIC |
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 84c3bf209e98..4a8bb82248ae 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <asm/pgtable.h> | 18 | #include <asm/pgtable.h> |
19 | #include <asm/tlbflush.h> | 19 | #include <asm/tlbflush.h> |
20 | #include <asm/mmu_context.h> | 20 | #include <asm/mmu_context.h> |
21 | #include <asm/debugreg.h> | ||
21 | 22 | ||
22 | static int init_one_level2_page(struct kimage *image, pgd_t *pgd, | 23 | static int init_one_level2_page(struct kimage *image, pgd_t *pgd, |
23 | unsigned long addr) | 24 | unsigned long addr) |
@@ -282,6 +283,7 @@ void machine_kexec(struct kimage *image) | |||
282 | 283 | ||
283 | /* Interrupts aren't acceptable while we reboot */ | 284 | /* Interrupts aren't acceptable while we reboot */ |
284 | local_irq_disable(); | 285 | local_irq_disable(); |
286 | hw_breakpoint_disable(); | ||
285 | 287 | ||
286 | if (image->preserve_context) { | 288 | if (image->preserve_context) { |
287 | #ifdef CONFIG_X86_IO_APIC | 289 | #ifdef CONFIG_X86_IO_APIC |
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 5284cd2b5776..744508e7cfdd 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
@@ -10,6 +10,7 @@ | |||
10 | #include <linux/clockchips.h> | 10 | #include <linux/clockchips.h> |
11 | #include <linux/random.h> | 11 | #include <linux/random.h> |
12 | #include <trace/events/power.h> | 12 | #include <trace/events/power.h> |
13 | #include <linux/hw_breakpoint.h> | ||
13 | #include <asm/system.h> | 14 | #include <asm/system.h> |
14 | #include <asm/apic.h> | 15 | #include <asm/apic.h> |
15 | #include <asm/syscalls.h> | 16 | #include <asm/syscalls.h> |
@@ -17,6 +18,7 @@ | |||
17 | #include <asm/uaccess.h> | 18 | #include <asm/uaccess.h> |
18 | #include <asm/i387.h> | 19 | #include <asm/i387.h> |
19 | #include <asm/ds.h> | 20 | #include <asm/ds.h> |
21 | #include <asm/debugreg.h> | ||
20 | 22 | ||
21 | unsigned long idle_halt; | 23 | unsigned long idle_halt; |
22 | EXPORT_SYMBOL(idle_halt); | 24 | EXPORT_SYMBOL(idle_halt); |
@@ -103,14 +105,7 @@ void flush_thread(void) | |||
103 | } | 105 | } |
104 | #endif | 106 | #endif |
105 | 107 | ||
106 | clear_tsk_thread_flag(tsk, TIF_DEBUG); | 108 | flush_ptrace_hw_breakpoint(tsk); |
107 | |||
108 | tsk->thread.debugreg0 = 0; | ||
109 | tsk->thread.debugreg1 = 0; | ||
110 | tsk->thread.debugreg2 = 0; | ||
111 | tsk->thread.debugreg3 = 0; | ||
112 | tsk->thread.debugreg6 = 0; | ||
113 | tsk->thread.debugreg7 = 0; | ||
114 | memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); | 109 | memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); |
115 | /* | 110 | /* |
116 | * Forget coprocessor state.. | 111 | * Forget coprocessor state.. |
@@ -192,16 +187,6 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, | |||
192 | else if (next->debugctlmsr != prev->debugctlmsr) | 187 | else if (next->debugctlmsr != prev->debugctlmsr) |
193 | update_debugctlmsr(next->debugctlmsr); | 188 | update_debugctlmsr(next->debugctlmsr); |
194 | 189 | ||
195 | if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { | ||
196 | set_debugreg(next->debugreg0, 0); | ||
197 | set_debugreg(next->debugreg1, 1); | ||
198 | set_debugreg(next->debugreg2, 2); | ||
199 | set_debugreg(next->debugreg3, 3); | ||
200 | /* no 4 and 5 */ | ||
201 | set_debugreg(next->debugreg6, 6); | ||
202 | set_debugreg(next->debugreg7, 7); | ||
203 | } | ||
204 | |||
205 | if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ | 190 | if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ |
206 | test_tsk_thread_flag(next_p, TIF_NOTSC)) { | 191 | test_tsk_thread_flag(next_p, TIF_NOTSC)) { |
207 | /* prev and next are different */ | 192 | /* prev and next are different */ |
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 4cf79567cdab..d5bd3132ee70 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c | |||
@@ -58,6 +58,7 @@ | |||
58 | #include <asm/idle.h> | 58 | #include <asm/idle.h> |
59 | #include <asm/syscalls.h> | 59 | #include <asm/syscalls.h> |
60 | #include <asm/ds.h> | 60 | #include <asm/ds.h> |
61 | #include <asm/debugreg.h> | ||
61 | 62 | ||
62 | asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); | 63 | asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); |
63 | 64 | ||
@@ -259,7 +260,12 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, | |||
259 | 260 | ||
260 | task_user_gs(p) = get_user_gs(regs); | 261 | task_user_gs(p) = get_user_gs(regs); |
261 | 262 | ||
263 | p->thread.io_bitmap_ptr = NULL; | ||
262 | tsk = current; | 264 | tsk = current; |
265 | err = -ENOMEM; | ||
266 | |||
267 | memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); | ||
268 | |||
263 | if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { | 269 | if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { |
264 | p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr, | 270 | p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr, |
265 | IO_BITMAP_BYTES, GFP_KERNEL); | 271 | IO_BITMAP_BYTES, GFP_KERNEL); |
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index eb62cbcaa490..70cf15873f3d 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
@@ -52,6 +52,7 @@ | |||
52 | #include <asm/idle.h> | 52 | #include <asm/idle.h> |
53 | #include <asm/syscalls.h> | 53 | #include <asm/syscalls.h> |
54 | #include <asm/ds.h> | 54 | #include <asm/ds.h> |
55 | #include <asm/debugreg.h> | ||
55 | 56 | ||
56 | asmlinkage extern void ret_from_fork(void); | 57 | asmlinkage extern void ret_from_fork(void); |
57 | 58 | ||
@@ -297,12 +298,16 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, | |||
297 | 298 | ||
298 | p->thread.fs = me->thread.fs; | 299 | p->thread.fs = me->thread.fs; |
299 | p->thread.gs = me->thread.gs; | 300 | p->thread.gs = me->thread.gs; |
301 | p->thread.io_bitmap_ptr = NULL; | ||
300 | 302 | ||
301 | savesegment(gs, p->thread.gsindex); | 303 | savesegment(gs, p->thread.gsindex); |
302 | savesegment(fs, p->thread.fsindex); | 304 | savesegment(fs, p->thread.fsindex); |
303 | savesegment(es, p->thread.es); | 305 | savesegment(es, p->thread.es); |
304 | savesegment(ds, p->thread.ds); | 306 | savesegment(ds, p->thread.ds); |
305 | 307 | ||
308 | err = -ENOMEM; | ||
309 | memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); | ||
310 | |||
306 | if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { | 311 | if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { |
307 | p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); | 312 | p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); |
308 | if (!p->thread.io_bitmap_ptr) { | 313 | if (!p->thread.io_bitmap_ptr) { |
@@ -341,6 +346,7 @@ out: | |||
341 | kfree(p->thread.io_bitmap_ptr); | 346 | kfree(p->thread.io_bitmap_ptr); |
342 | p->thread.io_bitmap_max = 0; | 347 | p->thread.io_bitmap_max = 0; |
343 | } | 348 | } |
349 | |||
344 | return err; | 350 | return err; |
345 | } | 351 | } |
346 | 352 | ||
@@ -495,6 +501,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
495 | */ | 501 | */ |
496 | if (preload_fpu) | 502 | if (preload_fpu) |
497 | __math_state_restore(); | 503 | __math_state_restore(); |
504 | |||
498 | return prev_p; | 505 | return prev_p; |
499 | } | 506 | } |
500 | 507 | ||
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index c4f76d275ee4..b25f8947ed7a 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c | |||
@@ -22,6 +22,8 @@ | |||
22 | #include <linux/seccomp.h> | 22 | #include <linux/seccomp.h> |
23 | #include <linux/signal.h> | 23 | #include <linux/signal.h> |
24 | #include <linux/workqueue.h> | 24 | #include <linux/workqueue.h> |
25 | #include <linux/perf_event.h> | ||
26 | #include <linux/hw_breakpoint.h> | ||
25 | 27 | ||
26 | #include <asm/uaccess.h> | 28 | #include <asm/uaccess.h> |
27 | #include <asm/pgtable.h> | 29 | #include <asm/pgtable.h> |
@@ -34,6 +36,7 @@ | |||
34 | #include <asm/prctl.h> | 36 | #include <asm/prctl.h> |
35 | #include <asm/proto.h> | 37 | #include <asm/proto.h> |
36 | #include <asm/ds.h> | 38 | #include <asm/ds.h> |
39 | #include <asm/hw_breakpoint.h> | ||
37 | 40 | ||
38 | #include "tls.h" | 41 | #include "tls.h" |
39 | 42 | ||
@@ -249,11 +252,6 @@ static int set_segment_reg(struct task_struct *task, | |||
249 | return 0; | 252 | return 0; |
250 | } | 253 | } |
251 | 254 | ||
252 | static unsigned long debugreg_addr_limit(struct task_struct *task) | ||
253 | { | ||
254 | return TASK_SIZE - 3; | ||
255 | } | ||
256 | |||
257 | #else /* CONFIG_X86_64 */ | 255 | #else /* CONFIG_X86_64 */ |
258 | 256 | ||
259 | #define FLAG_MASK (FLAG_MASK_32 | X86_EFLAGS_NT) | 257 | #define FLAG_MASK (FLAG_MASK_32 | X86_EFLAGS_NT) |
@@ -378,15 +376,6 @@ static int set_segment_reg(struct task_struct *task, | |||
378 | return 0; | 376 | return 0; |
379 | } | 377 | } |
380 | 378 | ||
381 | static unsigned long debugreg_addr_limit(struct task_struct *task) | ||
382 | { | ||
383 | #ifdef CONFIG_IA32_EMULATION | ||
384 | if (test_tsk_thread_flag(task, TIF_IA32)) | ||
385 | return IA32_PAGE_OFFSET - 3; | ||
386 | #endif | ||
387 | return TASK_SIZE_MAX - 7; | ||
388 | } | ||
389 | |||
390 | #endif /* CONFIG_X86_32 */ | 379 | #endif /* CONFIG_X86_32 */ |
391 | 380 | ||
392 | static unsigned long get_flags(struct task_struct *task) | 381 | static unsigned long get_flags(struct task_struct *task) |
@@ -566,99 +555,229 @@ static int genregs_set(struct task_struct *target, | |||
566 | return ret; | 555 | return ret; |
567 | } | 556 | } |
568 | 557 | ||
558 | static void ptrace_triggered(struct perf_event *bp, void *data) | ||
559 | { | ||
560 | int i; | ||
561 | struct thread_struct *thread = &(current->thread); | ||
562 | |||
563 | /* | ||
564 | * Store in the virtual DR6 register the fact that the breakpoint | ||
565 | * was hit so the thread's debugger will see it. | ||
566 | */ | ||
567 | for (i = 0; i < HBP_NUM; i++) { | ||
568 | if (thread->ptrace_bps[i] == bp) | ||
569 | break; | ||
570 | } | ||
571 | |||
572 | thread->debugreg6 |= (DR_TRAP0 << i); | ||
573 | } | ||
574 | |||
569 | /* | 575 | /* |
570 | * This function is trivial and will be inlined by the compiler. | 576 | * Walk through every ptrace breakpoints for this thread and |
571 | * Having it separates the implementation details of debug | 577 | * build the dr7 value on top of their attributes. |
572 | * registers from the interface details of ptrace. | 578 | * |
573 | */ | 579 | */ |
574 | static unsigned long ptrace_get_debugreg(struct task_struct *child, int n) | 580 | static unsigned long ptrace_get_dr7(struct perf_event *bp[]) |
575 | { | 581 | { |
576 | switch (n) { | 582 | int i; |
577 | case 0: return child->thread.debugreg0; | 583 | int dr7 = 0; |
578 | case 1: return child->thread.debugreg1; | 584 | struct arch_hw_breakpoint *info; |
579 | case 2: return child->thread.debugreg2; | 585 | |
580 | case 3: return child->thread.debugreg3; | 586 | for (i = 0; i < HBP_NUM; i++) { |
581 | case 6: return child->thread.debugreg6; | 587 | if (bp[i] && !bp[i]->attr.disabled) { |
582 | case 7: return child->thread.debugreg7; | 588 | info = counter_arch_bp(bp[i]); |
589 | dr7 |= encode_dr7(i, info->len, info->type); | ||
590 | } | ||
583 | } | 591 | } |
584 | return 0; | 592 | |
593 | return dr7; | ||
585 | } | 594 | } |
586 | 595 | ||
587 | static int ptrace_set_debugreg(struct task_struct *child, | 596 | /* |
588 | int n, unsigned long data) | 597 | * Handle ptrace writes to debug register 7. |
598 | */ | ||
599 | static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data) | ||
589 | { | 600 | { |
590 | int i; | 601 | struct thread_struct *thread = &(tsk->thread); |
602 | unsigned long old_dr7; | ||
603 | int i, orig_ret = 0, rc = 0; | ||
604 | int enabled, second_pass = 0; | ||
605 | unsigned len, type; | ||
606 | int gen_len, gen_type; | ||
607 | struct perf_event *bp; | ||
608 | |||
609 | data &= ~DR_CONTROL_RESERVED; | ||
610 | old_dr7 = ptrace_get_dr7(thread->ptrace_bps); | ||
611 | restore: | ||
612 | /* | ||
613 | * Loop through all the hardware breakpoints, making the | ||
614 | * appropriate changes to each. | ||
615 | */ | ||
616 | for (i = 0; i < HBP_NUM; i++) { | ||
617 | enabled = decode_dr7(data, i, &len, &type); | ||
618 | bp = thread->ptrace_bps[i]; | ||
619 | |||
620 | if (!enabled) { | ||
621 | if (bp) { | ||
622 | /* | ||
623 | * Don't unregister the breakpoints right-away, | ||
624 | * unless all register_user_hw_breakpoint() | ||
625 | * requests have succeeded. This prevents | ||
626 | * any window of opportunity for debug | ||
627 | * register grabbing by other users. | ||
628 | */ | ||
629 | if (!second_pass) | ||
630 | continue; | ||
631 | thread->ptrace_bps[i] = NULL; | ||
632 | unregister_hw_breakpoint(bp); | ||
633 | } | ||
634 | continue; | ||
635 | } | ||
591 | 636 | ||
592 | if (unlikely(n == 4 || n == 5)) | 637 | /* |
593 | return -EIO; | 638 | * We shoud have at least an inactive breakpoint at this |
639 | * slot. It means the user is writing dr7 without having | ||
640 | * written the address register first | ||
641 | */ | ||
642 | if (!bp) { | ||
643 | rc = -EINVAL; | ||
644 | break; | ||
645 | } | ||
594 | 646 | ||
595 | if (n < 4 && unlikely(data >= debugreg_addr_limit(child))) | 647 | rc = arch_bp_generic_fields(len, type, &gen_len, &gen_type); |
596 | return -EIO; | 648 | if (rc) |
649 | break; | ||
597 | 650 | ||
598 | switch (n) { | 651 | /* |
599 | case 0: child->thread.debugreg0 = data; break; | 652 | * This is a temporary thing as bp is unregistered/registered |
600 | case 1: child->thread.debugreg1 = data; break; | 653 | * to simulate modification |
601 | case 2: child->thread.debugreg2 = data; break; | 654 | */ |
602 | case 3: child->thread.debugreg3 = data; break; | 655 | bp = modify_user_hw_breakpoint(bp, bp->attr.bp_addr, gen_len, |
656 | gen_type, bp->callback, | ||
657 | tsk, true); | ||
658 | thread->ptrace_bps[i] = NULL; | ||
603 | 659 | ||
604 | case 6: | 660 | if (!bp) { /* incorrect bp, or we have a bug in bp API */ |
605 | if ((data & ~0xffffffffUL) != 0) | 661 | rc = -EINVAL; |
606 | return -EIO; | 662 | break; |
607 | child->thread.debugreg6 = data; | 663 | } |
608 | break; | 664 | if (IS_ERR(bp)) { |
665 | rc = PTR_ERR(bp); | ||
666 | bp = NULL; | ||
667 | break; | ||
668 | } | ||
669 | thread->ptrace_bps[i] = bp; | ||
670 | } | ||
671 | /* | ||
672 | * Make a second pass to free the remaining unused breakpoints | ||
673 | * or to restore the original breakpoints if an error occurred. | ||
674 | */ | ||
675 | if (!second_pass) { | ||
676 | second_pass = 1; | ||
677 | if (rc < 0) { | ||
678 | orig_ret = rc; | ||
679 | data = old_dr7; | ||
680 | } | ||
681 | goto restore; | ||
682 | } | ||
683 | return ((orig_ret < 0) ? orig_ret : rc); | ||
684 | } | ||
609 | 685 | ||
610 | case 7: | 686 | /* |
687 | * Handle PTRACE_PEEKUSR calls for the debug register area. | ||
688 | */ | ||
689 | static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n) | ||
690 | { | ||
691 | struct thread_struct *thread = &(tsk->thread); | ||
692 | unsigned long val = 0; | ||
693 | |||
694 | if (n < HBP_NUM) { | ||
695 | struct perf_event *bp; | ||
696 | bp = thread->ptrace_bps[n]; | ||
697 | if (!bp) | ||
698 | return 0; | ||
699 | val = bp->hw.info.address; | ||
700 | } else if (n == 6) { | ||
701 | val = thread->debugreg6; | ||
702 | } else if (n == 7) { | ||
703 | val = ptrace_get_dr7(thread->ptrace_bps); | ||
704 | } | ||
705 | return val; | ||
706 | } | ||
707 | |||
708 | static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr, | ||
709 | unsigned long addr) | ||
710 | { | ||
711 | struct perf_event *bp; | ||
712 | struct thread_struct *t = &tsk->thread; | ||
713 | |||
714 | if (!t->ptrace_bps[nr]) { | ||
611 | /* | 715 | /* |
612 | * Sanity-check data. Take one half-byte at once with | 716 | * Put stub len and type to register (reserve) an inactive but |
613 | * check = (val >> (16 + 4*i)) & 0xf. It contains the | 717 | * correct bp |
614 | * R/Wi and LENi bits; bits 0 and 1 are R/Wi, and bits | ||
615 | * 2 and 3 are LENi. Given a list of invalid values, | ||
616 | * we do mask |= 1 << invalid_value, so that | ||
617 | * (mask >> check) & 1 is a correct test for invalid | ||
618 | * values. | ||
619 | * | ||
620 | * R/Wi contains the type of the breakpoint / | ||
621 | * watchpoint, LENi contains the length of the watched | ||
622 | * data in the watchpoint case. | ||
623 | * | ||
624 | * The invalid values are: | ||
625 | * - LENi == 0x10 (undefined), so mask |= 0x0f00. [32-bit] | ||
626 | * - R/Wi == 0x10 (break on I/O reads or writes), so | ||
627 | * mask |= 0x4444. | ||
628 | * - R/Wi == 0x00 && LENi != 0x00, so we have mask |= | ||
629 | * 0x1110. | ||
630 | * | ||
631 | * Finally, mask = 0x0f00 | 0x4444 | 0x1110 == 0x5f54. | ||
632 | * | ||
633 | * See the Intel Manual "System Programming Guide", | ||
634 | * 15.2.4 | ||
635 | * | ||
636 | * Note that LENi == 0x10 is defined on x86_64 in long | ||
637 | * mode (i.e. even for 32-bit userspace software, but | ||
638 | * 64-bit kernel), so the x86_64 mask value is 0x5454. | ||
639 | * See the AMD manual no. 24593 (AMD64 System Programming) | ||
640 | */ | 718 | */ |
641 | #ifdef CONFIG_X86_32 | 719 | bp = register_user_hw_breakpoint(addr, HW_BREAKPOINT_LEN_1, |
642 | #define DR7_MASK 0x5f54 | 720 | HW_BREAKPOINT_W, |
643 | #else | 721 | ptrace_triggered, tsk, |
644 | #define DR7_MASK 0x5554 | 722 | false); |
645 | #endif | 723 | } else { |
646 | data &= ~DR_CONTROL_RESERVED; | 724 | bp = t->ptrace_bps[nr]; |
647 | for (i = 0; i < 4; i++) | 725 | t->ptrace_bps[nr] = NULL; |
648 | if ((DR7_MASK >> ((data >> (16 + 4*i)) & 0xf)) & 1) | 726 | bp = modify_user_hw_breakpoint(bp, addr, bp->attr.bp_len, |
649 | return -EIO; | 727 | bp->attr.bp_type, |
650 | child->thread.debugreg7 = data; | 728 | bp->callback, |
651 | if (data) | 729 | tsk, |
652 | set_tsk_thread_flag(child, TIF_DEBUG); | 730 | bp->attr.disabled); |
653 | else | ||
654 | clear_tsk_thread_flag(child, TIF_DEBUG); | ||
655 | break; | ||
656 | } | 731 | } |
657 | 732 | ||
733 | if (!bp) | ||
734 | return -EIO; | ||
735 | /* | ||
736 | * CHECKME: the previous code returned -EIO if the addr wasn't a | ||
737 | * valid task virtual addr. The new one will return -EINVAL in this | ||
738 | * case. | ||
739 | * -EINVAL may be what we want for in-kernel breakpoints users, but | ||
740 | * -EIO looks better for ptrace, since we refuse a register writing | ||
741 | * for the user. And anyway this is the previous behaviour. | ||
742 | */ | ||
743 | if (IS_ERR(bp)) | ||
744 | return PTR_ERR(bp); | ||
745 | |||
746 | t->ptrace_bps[nr] = bp; | ||
747 | |||
658 | return 0; | 748 | return 0; |
659 | } | 749 | } |
660 | 750 | ||
661 | /* | 751 | /* |
752 | * Handle PTRACE_POKEUSR calls for the debug register area. | ||
753 | */ | ||
754 | int ptrace_set_debugreg(struct task_struct *tsk, int n, unsigned long val) | ||
755 | { | ||
756 | struct thread_struct *thread = &(tsk->thread); | ||
757 | int rc = 0; | ||
758 | |||
759 | /* There are no DR4 or DR5 registers */ | ||
760 | if (n == 4 || n == 5) | ||
761 | return -EIO; | ||
762 | |||
763 | if (n == 6) { | ||
764 | thread->debugreg6 = val; | ||
765 | goto ret_path; | ||
766 | } | ||
767 | if (n < HBP_NUM) { | ||
768 | rc = ptrace_set_breakpoint_addr(tsk, n, val); | ||
769 | if (rc) | ||
770 | return rc; | ||
771 | } | ||
772 | /* All that's left is DR7 */ | ||
773 | if (n == 7) | ||
774 | rc = ptrace_write_dr7(tsk, val); | ||
775 | |||
776 | ret_path: | ||
777 | return rc; | ||
778 | } | ||
779 | |||
780 | /* | ||
662 | * These access the current or another (stopped) task's io permission | 781 | * These access the current or another (stopped) task's io permission |
663 | * bitmap for debugging or core dump. | 782 | * bitmap for debugging or core dump. |
664 | */ | 783 | */ |
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 6a44a76055ad..fbf3b07c8567 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c | |||
@@ -799,15 +799,6 @@ static void do_signal(struct pt_regs *regs) | |||
799 | 799 | ||
800 | signr = get_signal_to_deliver(&info, &ka, regs, NULL); | 800 | signr = get_signal_to_deliver(&info, &ka, regs, NULL); |
801 | if (signr > 0) { | 801 | if (signr > 0) { |
802 | /* | ||
803 | * Re-enable any watchpoints before delivering the | ||
804 | * signal to user space. The processor register will | ||
805 | * have been cleared if the watchpoint triggered | ||
806 | * inside the kernel. | ||
807 | */ | ||
808 | if (current->thread.debugreg7) | ||
809 | set_debugreg(current->thread.debugreg7, 7); | ||
810 | |||
811 | /* Whee! Actually deliver the signal. */ | 802 | /* Whee! Actually deliver the signal. */ |
812 | if (handle_signal(signr, &info, &ka, oldset, regs) == 0) { | 803 | if (handle_signal(signr, &info, &ka, oldset, regs) == 0) { |
813 | /* | 804 | /* |
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 7e37dcee0cc3..33399176512a 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
@@ -529,77 +529,56 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) | |||
529 | dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) | 529 | dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) |
530 | { | 530 | { |
531 | struct task_struct *tsk = current; | 531 | struct task_struct *tsk = current; |
532 | unsigned long condition; | 532 | unsigned long dr6; |
533 | int si_code; | 533 | int si_code; |
534 | 534 | ||
535 | get_debugreg(condition, 6); | 535 | get_debugreg(dr6, 6); |
536 | 536 | ||
537 | /* Catch kmemcheck conditions first of all! */ | 537 | /* Catch kmemcheck conditions first of all! */ |
538 | if (condition & DR_STEP && kmemcheck_trap(regs)) | 538 | if ((dr6 & DR_STEP) && kmemcheck_trap(regs)) |
539 | return; | 539 | return; |
540 | 540 | ||
541 | /* DR6 may or may not be cleared by the CPU */ | ||
542 | set_debugreg(0, 6); | ||
541 | /* | 543 | /* |
542 | * The processor cleared BTF, so don't mark that we need it set. | 544 | * The processor cleared BTF, so don't mark that we need it set. |
543 | */ | 545 | */ |
544 | clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR); | 546 | clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR); |
545 | tsk->thread.debugctlmsr = 0; | 547 | tsk->thread.debugctlmsr = 0; |
546 | 548 | ||
547 | if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, | 549 | /* Store the virtualized DR6 value */ |
548 | SIGTRAP) == NOTIFY_STOP) | 550 | tsk->thread.debugreg6 = dr6; |
551 | |||
552 | if (notify_die(DIE_DEBUG, "debug", regs, PTR_ERR(&dr6), error_code, | ||
553 | SIGTRAP) == NOTIFY_STOP) | ||
549 | return; | 554 | return; |
550 | 555 | ||
551 | /* It's safe to allow irq's after DR6 has been saved */ | 556 | /* It's safe to allow irq's after DR6 has been saved */ |
552 | preempt_conditional_sti(regs); | 557 | preempt_conditional_sti(regs); |
553 | 558 | ||
554 | /* Mask out spurious debug traps due to lazy DR7 setting */ | 559 | if (regs->flags & X86_VM_MASK) { |
555 | if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { | 560 | handle_vm86_trap((struct kernel_vm86_regs *) regs, |
556 | if (!tsk->thread.debugreg7) | 561 | error_code, 1); |
557 | goto clear_dr7; | 562 | return; |
558 | } | 563 | } |
559 | 564 | ||
560 | #ifdef CONFIG_X86_32 | ||
561 | if (regs->flags & X86_VM_MASK) | ||
562 | goto debug_vm86; | ||
563 | #endif | ||
564 | |||
565 | /* Save debug status register where ptrace can see it */ | ||
566 | tsk->thread.debugreg6 = condition; | ||
567 | |||
568 | /* | 565 | /* |
569 | * Single-stepping through TF: make sure we ignore any events in | 566 | * Single-stepping through system calls: ignore any exceptions in |
570 | * kernel space (but re-enable TF when returning to user mode). | 567 | * kernel space, but re-enable TF when returning to user mode. |
568 | * | ||
569 | * We already checked v86 mode above, so we can check for kernel mode | ||
570 | * by just checking the CPL of CS. | ||
571 | */ | 571 | */ |
572 | if (condition & DR_STEP) { | 572 | if ((dr6 & DR_STEP) && !user_mode(regs)) { |
573 | if (!user_mode(regs)) | 573 | tsk->thread.debugreg6 &= ~DR_STEP; |
574 | goto clear_TF_reenable; | 574 | set_tsk_thread_flag(tsk, TIF_SINGLESTEP); |
575 | regs->flags &= ~X86_EFLAGS_TF; | ||
575 | } | 576 | } |
576 | 577 | si_code = get_si_code(tsk->thread.debugreg6); | |
577 | si_code = get_si_code(condition); | 578 | if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS)) |
578 | /* Ok, finally something we can handle */ | 579 | send_sigtrap(tsk, regs, error_code, si_code); |
579 | send_sigtrap(tsk, regs, error_code, si_code); | ||
580 | |||
581 | /* | ||
582 | * Disable additional traps. They'll be re-enabled when | ||
583 | * the signal is delivered. | ||
584 | */ | ||
585 | clear_dr7: | ||
586 | set_debugreg(0, 7); | ||
587 | preempt_conditional_cli(regs); | 580 | preempt_conditional_cli(regs); |
588 | return; | ||
589 | 581 | ||
590 | #ifdef CONFIG_X86_32 | ||
591 | debug_vm86: | ||
592 | /* reenable preemption: handle_vm86_trap() might sleep */ | ||
593 | dec_preempt_count(); | ||
594 | handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1); | ||
595 | conditional_cli(regs); | ||
596 | return; | ||
597 | #endif | ||
598 | |||
599 | clear_TF_reenable: | ||
600 | set_tsk_thread_flag(tsk, TIF_SINGLESTEP); | ||
601 | regs->flags &= ~X86_EFLAGS_TF; | ||
602 | preempt_conditional_cli(regs); | ||
603 | return; | 582 | return; |
604 | } | 583 | } |
605 | 584 | ||
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ae07d261527c..4fc80174191c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -42,6 +42,7 @@ | |||
42 | #define CREATE_TRACE_POINTS | 42 | #define CREATE_TRACE_POINTS |
43 | #include "trace.h" | 43 | #include "trace.h" |
44 | 44 | ||
45 | #include <asm/debugreg.h> | ||
45 | #include <asm/uaccess.h> | 46 | #include <asm/uaccess.h> |
46 | #include <asm/msr.h> | 47 | #include <asm/msr.h> |
47 | #include <asm/desc.h> | 48 | #include <asm/desc.h> |
@@ -3643,14 +3644,15 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3643 | trace_kvm_entry(vcpu->vcpu_id); | 3644 | trace_kvm_entry(vcpu->vcpu_id); |
3644 | kvm_x86_ops->run(vcpu, kvm_run); | 3645 | kvm_x86_ops->run(vcpu, kvm_run); |
3645 | 3646 | ||
3646 | if (unlikely(vcpu->arch.switch_db_regs || test_thread_flag(TIF_DEBUG))) { | 3647 | /* |
3647 | set_debugreg(current->thread.debugreg0, 0); | 3648 | * If the guest has used debug registers, at least dr7 |
3648 | set_debugreg(current->thread.debugreg1, 1); | 3649 | * will be disabled while returning to the host. |
3649 | set_debugreg(current->thread.debugreg2, 2); | 3650 | * If we don't have active breakpoints in the host, we don't |
3650 | set_debugreg(current->thread.debugreg3, 3); | 3651 | * care about the messed up debug address registers. But if |
3651 | set_debugreg(current->thread.debugreg6, 6); | 3652 | * we have some of them active, restore the old state. |
3652 | set_debugreg(current->thread.debugreg7, 7); | 3653 | */ |
3653 | } | 3654 | if (hw_breakpoint_active()) |
3655 | hw_breakpoint_restore(); | ||
3654 | 3656 | ||
3655 | set_bit(KVM_REQ_KICK, &vcpu->requests); | 3657 | set_bit(KVM_REQ_KICK, &vcpu->requests); |
3656 | local_irq_enable(); | 3658 | local_irq_enable(); |
diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index 16ccbd77917f..11a4ad4d6253 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c | |||
@@ -540,8 +540,14 @@ kmmio_die_notifier(struct notifier_block *nb, unsigned long val, void *args) | |||
540 | struct die_args *arg = args; | 540 | struct die_args *arg = args; |
541 | 541 | ||
542 | if (val == DIE_DEBUG && (arg->err & DR_STEP)) | 542 | if (val == DIE_DEBUG && (arg->err & DR_STEP)) |
543 | if (post_kmmio_handler(arg->err, arg->regs) == 1) | 543 | if (post_kmmio_handler(arg->err, arg->regs) == 1) { |
544 | /* | ||
545 | * Reset the BS bit in dr6 (pointed by args->err) to | ||
546 | * denote completion of processing | ||
547 | */ | ||
548 | (*(unsigned long *)ERR_PTR(arg->err)) &= ~DR_STEP; | ||
544 | return NOTIFY_STOP; | 549 | return NOTIFY_STOP; |
550 | } | ||
545 | 551 | ||
546 | return NOTIFY_DONE; | 552 | return NOTIFY_DONE; |
547 | } | 553 | } |
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index 8aa85f17667e..0a979f3e5b8a 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <asm/mce.h> | 18 | #include <asm/mce.h> |
19 | #include <asm/xcr.h> | 19 | #include <asm/xcr.h> |
20 | #include <asm/suspend.h> | 20 | #include <asm/suspend.h> |
21 | #include <asm/debugreg.h> | ||
21 | 22 | ||
22 | #ifdef CONFIG_X86_32 | 23 | #ifdef CONFIG_X86_32 |
23 | static struct saved_context saved_context; | 24 | static struct saved_context saved_context; |
@@ -142,31 +143,6 @@ static void fix_processor_context(void) | |||
142 | #endif | 143 | #endif |
143 | load_TR_desc(); /* This does ltr */ | 144 | load_TR_desc(); /* This does ltr */ |
144 | load_LDT(¤t->active_mm->context); /* This does lldt */ | 145 | load_LDT(¤t->active_mm->context); /* This does lldt */ |
145 | |||
146 | /* | ||
147 | * Now maybe reload the debug registers | ||
148 | */ | ||
149 | if (current->thread.debugreg7) { | ||
150 | #ifdef CONFIG_X86_32 | ||
151 | set_debugreg(current->thread.debugreg0, 0); | ||
152 | set_debugreg(current->thread.debugreg1, 1); | ||
153 | set_debugreg(current->thread.debugreg2, 2); | ||
154 | set_debugreg(current->thread.debugreg3, 3); | ||
155 | /* no 4 and 5 */ | ||
156 | set_debugreg(current->thread.debugreg6, 6); | ||
157 | set_debugreg(current->thread.debugreg7, 7); | ||
158 | #else | ||
159 | /* CONFIG_X86_64 */ | ||
160 | loaddebug(¤t->thread, 0); | ||
161 | loaddebug(¤t->thread, 1); | ||
162 | loaddebug(¤t->thread, 2); | ||
163 | loaddebug(¤t->thread, 3); | ||
164 | /* no 4 and 5 */ | ||
165 | loaddebug(¤t->thread, 6); | ||
166 | loaddebug(¤t->thread, 7); | ||
167 | #endif | ||
168 | } | ||
169 | |||
170 | } | 146 | } |
171 | 147 | ||
172 | /** | 148 | /** |
diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h new file mode 100644 index 000000000000..0b98cbf76da7 --- /dev/null +++ b/include/linux/hw_breakpoint.h | |||
@@ -0,0 +1,137 @@ | |||
1 | #ifndef _LINUX_HW_BREAKPOINT_H | ||
2 | #define _LINUX_HW_BREAKPOINT_H | ||
3 | |||
4 | #include <linux/perf_event.h> | ||
5 | |||
6 | enum { | ||
7 | HW_BREAKPOINT_LEN_1 = 1, | ||
8 | HW_BREAKPOINT_LEN_2 = 2, | ||
9 | HW_BREAKPOINT_LEN_4 = 4, | ||
10 | HW_BREAKPOINT_LEN_8 = 8, | ||
11 | }; | ||
12 | |||
13 | enum { | ||
14 | HW_BREAKPOINT_R = 1, | ||
15 | HW_BREAKPOINT_W = 2, | ||
16 | HW_BREAKPOINT_X = 4, | ||
17 | }; | ||
18 | |||
19 | #ifdef CONFIG_HAVE_HW_BREAKPOINT | ||
20 | |||
21 | static inline unsigned long hw_breakpoint_addr(struct perf_event *bp) | ||
22 | { | ||
23 | return bp->attr.bp_addr; | ||
24 | } | ||
25 | |||
26 | static inline int hw_breakpoint_type(struct perf_event *bp) | ||
27 | { | ||
28 | return bp->attr.bp_type; | ||
29 | } | ||
30 | |||
31 | static inline int hw_breakpoint_len(struct perf_event *bp) | ||
32 | { | ||
33 | return bp->attr.bp_len; | ||
34 | } | ||
35 | |||
36 | extern struct perf_event * | ||
37 | register_user_hw_breakpoint(unsigned long addr, | ||
38 | int len, | ||
39 | int type, | ||
40 | perf_callback_t triggered, | ||
41 | struct task_struct *tsk, | ||
42 | bool active); | ||
43 | |||
44 | /* FIXME: only change from the attr, and don't unregister */ | ||
45 | extern struct perf_event * | ||
46 | modify_user_hw_breakpoint(struct perf_event *bp, | ||
47 | unsigned long addr, | ||
48 | int len, | ||
49 | int type, | ||
50 | perf_callback_t triggered, | ||
51 | struct task_struct *tsk, | ||
52 | bool active); | ||
53 | |||
54 | /* | ||
55 | * Kernel breakpoints are not associated with any particular thread. | ||
56 | */ | ||
57 | extern struct perf_event * | ||
58 | register_wide_hw_breakpoint_cpu(unsigned long addr, | ||
59 | int len, | ||
60 | int type, | ||
61 | perf_callback_t triggered, | ||
62 | int cpu, | ||
63 | bool active); | ||
64 | |||
65 | extern struct perf_event ** | ||
66 | register_wide_hw_breakpoint(unsigned long addr, | ||
67 | int len, | ||
68 | int type, | ||
69 | perf_callback_t triggered, | ||
70 | bool active); | ||
71 | |||
72 | extern int register_perf_hw_breakpoint(struct perf_event *bp); | ||
73 | extern int __register_perf_hw_breakpoint(struct perf_event *bp); | ||
74 | extern void unregister_hw_breakpoint(struct perf_event *bp); | ||
75 | extern void unregister_wide_hw_breakpoint(struct perf_event **cpu_events); | ||
76 | |||
77 | extern int reserve_bp_slot(struct perf_event *bp); | ||
78 | extern void release_bp_slot(struct perf_event *bp); | ||
79 | |||
80 | extern void flush_ptrace_hw_breakpoint(struct task_struct *tsk); | ||
81 | |||
82 | static inline struct arch_hw_breakpoint *counter_arch_bp(struct perf_event *bp) | ||
83 | { | ||
84 | return &bp->hw.info; | ||
85 | } | ||
86 | |||
87 | #else /* !CONFIG_HAVE_HW_BREAKPOINT */ | ||
88 | |||
89 | static inline struct perf_event * | ||
90 | register_user_hw_breakpoint(unsigned long addr, | ||
91 | int len, | ||
92 | int type, | ||
93 | perf_callback_t triggered, | ||
94 | struct task_struct *tsk, | ||
95 | bool active) { return NULL; } | ||
96 | static inline struct perf_event * | ||
97 | modify_user_hw_breakpoint(struct perf_event *bp, | ||
98 | unsigned long addr, | ||
99 | int len, | ||
100 | int type, | ||
101 | perf_callback_t triggered, | ||
102 | struct task_struct *tsk, | ||
103 | bool active) { return NULL; } | ||
104 | static inline struct perf_event * | ||
105 | register_wide_hw_breakpoint_cpu(unsigned long addr, | ||
106 | int len, | ||
107 | int type, | ||
108 | perf_callback_t triggered, | ||
109 | int cpu, | ||
110 | bool active) { return NULL; } | ||
111 | static inline struct perf_event ** | ||
112 | register_wide_hw_breakpoint(unsigned long addr, | ||
113 | int len, | ||
114 | int type, | ||
115 | perf_callback_t triggered, | ||
116 | bool active) { return NULL; } | ||
117 | static inline int | ||
118 | register_perf_hw_breakpoint(struct perf_event *bp) { return -ENOSYS; } | ||
119 | static inline int | ||
120 | __register_perf_hw_breakpoint(struct perf_event *bp) { return -ENOSYS; } | ||
121 | static inline void unregister_hw_breakpoint(struct perf_event *bp) { } | ||
122 | static inline void | ||
123 | unregister_wide_hw_breakpoint(struct perf_event **cpu_events) { } | ||
124 | static inline int | ||
125 | reserve_bp_slot(struct perf_event *bp) {return -ENOSYS; } | ||
126 | static inline void release_bp_slot(struct perf_event *bp) { } | ||
127 | |||
128 | static inline void flush_ptrace_hw_breakpoint(struct task_struct *tsk) { } | ||
129 | |||
130 | static inline struct arch_hw_breakpoint *counter_arch_bp(struct perf_event *bp) | ||
131 | { | ||
132 | return NULL; | ||
133 | } | ||
134 | |||
135 | #endif /* CONFIG_HAVE_HW_BREAKPOINT */ | ||
136 | |||
137 | #endif /* _LINUX_HW_BREAKPOINT_H */ | ||
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 7f87563c8485..b5cdac0de370 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h | |||
@@ -18,6 +18,10 @@ | |||
18 | #include <linux/ioctl.h> | 18 | #include <linux/ioctl.h> |
19 | #include <asm/byteorder.h> | 19 | #include <asm/byteorder.h> |
20 | 20 | ||
21 | #ifdef CONFIG_HAVE_HW_BREAKPOINT | ||
22 | #include <asm/hw_breakpoint.h> | ||
23 | #endif | ||
24 | |||
21 | /* | 25 | /* |
22 | * User-space ABI bits: | 26 | * User-space ABI bits: |
23 | */ | 27 | */ |
@@ -31,6 +35,7 @@ enum perf_type_id { | |||
31 | PERF_TYPE_TRACEPOINT = 2, | 35 | PERF_TYPE_TRACEPOINT = 2, |
32 | PERF_TYPE_HW_CACHE = 3, | 36 | PERF_TYPE_HW_CACHE = 3, |
33 | PERF_TYPE_RAW = 4, | 37 | PERF_TYPE_RAW = 4, |
38 | PERF_TYPE_BREAKPOINT = 5, | ||
34 | 39 | ||
35 | PERF_TYPE_MAX, /* non-ABI */ | 40 | PERF_TYPE_MAX, /* non-ABI */ |
36 | }; | 41 | }; |
@@ -209,6 +214,15 @@ struct perf_event_attr { | |||
209 | __u32 wakeup_events; /* wakeup every n events */ | 214 | __u32 wakeup_events; /* wakeup every n events */ |
210 | __u32 wakeup_watermark; /* bytes before wakeup */ | 215 | __u32 wakeup_watermark; /* bytes before wakeup */ |
211 | }; | 216 | }; |
217 | |||
218 | union { | ||
219 | struct { /* Hardware breakpoint info */ | ||
220 | __u64 bp_addr; | ||
221 | __u32 bp_type; | ||
222 | __u32 bp_len; | ||
223 | }; | ||
224 | }; | ||
225 | |||
212 | __u32 __reserved_2; | 226 | __u32 __reserved_2; |
213 | 227 | ||
214 | __u64 __reserved_3; | 228 | __u64 __reserved_3; |
@@ -478,6 +492,11 @@ struct hw_perf_event { | |||
478 | s64 remaining; | 492 | s64 remaining; |
479 | struct hrtimer hrtimer; | 493 | struct hrtimer hrtimer; |
480 | }; | 494 | }; |
495 | #ifdef CONFIG_HAVE_HW_BREAKPOINT | ||
496 | union { /* breakpoint */ | ||
497 | struct arch_hw_breakpoint info; | ||
498 | }; | ||
499 | #endif | ||
481 | }; | 500 | }; |
482 | atomic64_t prev_count; | 501 | atomic64_t prev_count; |
483 | u64 sample_period; | 502 | u64 sample_period; |
@@ -546,6 +565,8 @@ struct perf_pending_entry { | |||
546 | void (*func)(struct perf_pending_entry *); | 565 | void (*func)(struct perf_pending_entry *); |
547 | }; | 566 | }; |
548 | 567 | ||
568 | typedef void (*perf_callback_t)(struct perf_event *, void *); | ||
569 | |||
549 | /** | 570 | /** |
550 | * struct perf_event - performance event kernel representation: | 571 | * struct perf_event - performance event kernel representation: |
551 | */ | 572 | */ |
@@ -588,7 +609,7 @@ struct perf_event { | |||
588 | u64 tstamp_running; | 609 | u64 tstamp_running; |
589 | u64 tstamp_stopped; | 610 | u64 tstamp_stopped; |
590 | 611 | ||
591 | struct perf_event_attr attr; | 612 | struct perf_event_attr attr; |
592 | struct hw_perf_event hw; | 613 | struct hw_perf_event hw; |
593 | 614 | ||
594 | struct perf_event_context *ctx; | 615 | struct perf_event_context *ctx; |
@@ -641,6 +662,10 @@ struct perf_event { | |||
641 | struct event_filter *filter; | 662 | struct event_filter *filter; |
642 | #endif | 663 | #endif |
643 | 664 | ||
665 | perf_callback_t callback; | ||
666 | |||
667 | perf_callback_t event_callback; | ||
668 | |||
644 | #endif /* CONFIG_PERF_EVENTS */ | 669 | #endif /* CONFIG_PERF_EVENTS */ |
645 | }; | 670 | }; |
646 | 671 | ||
@@ -745,6 +770,13 @@ extern int hw_perf_group_sched_in(struct perf_event *group_leader, | |||
745 | struct perf_cpu_context *cpuctx, | 770 | struct perf_cpu_context *cpuctx, |
746 | struct perf_event_context *ctx, int cpu); | 771 | struct perf_event_context *ctx, int cpu); |
747 | extern void perf_event_update_userpage(struct perf_event *event); | 772 | extern void perf_event_update_userpage(struct perf_event *event); |
773 | extern int perf_event_release_kernel(struct perf_event *event); | ||
774 | extern struct perf_event * | ||
775 | perf_event_create_kernel_counter(struct perf_event_attr *attr, | ||
776 | int cpu, | ||
777 | pid_t pid, | ||
778 | perf_callback_t callback); | ||
779 | extern u64 perf_event_read_value(struct perf_event *event); | ||
748 | 780 | ||
749 | struct perf_sample_data { | 781 | struct perf_sample_data { |
750 | u64 type; | 782 | u64 type; |
@@ -821,6 +853,7 @@ extern int sysctl_perf_event_sample_rate; | |||
821 | extern void perf_event_init(void); | 853 | extern void perf_event_init(void); |
822 | extern void perf_tp_event(int event_id, u64 addr, u64 count, | 854 | extern void perf_tp_event(int event_id, u64 addr, u64 count, |
823 | void *record, int entry_size); | 855 | void *record, int entry_size); |
856 | extern void perf_bp_event(struct perf_event *event, void *data); | ||
824 | 857 | ||
825 | #ifndef perf_misc_flags | 858 | #ifndef perf_misc_flags |
826 | #define perf_misc_flags(regs) (user_mode(regs) ? PERF_RECORD_MISC_USER : \ | 859 | #define perf_misc_flags(regs) (user_mode(regs) ? PERF_RECORD_MISC_USER : \ |
@@ -855,6 +888,8 @@ static inline int perf_event_task_enable(void) { return -EINVAL; } | |||
855 | static inline void | 888 | static inline void |
856 | perf_sw_event(u32 event_id, u64 nr, int nmi, | 889 | perf_sw_event(u32 event_id, u64 nr, int nmi, |
857 | struct pt_regs *regs, u64 addr) { } | 890 | struct pt_regs *regs, u64 addr) { } |
891 | static inline void | ||
892 | perf_bp_event(struct perf_event *event, void *data) { } | ||
858 | 893 | ||
859 | static inline void perf_event_mmap(struct vm_area_struct *vma) { } | 894 | static inline void perf_event_mmap(struct vm_area_struct *vma) { } |
860 | static inline void perf_event_comm(struct task_struct *tsk) { } | 895 | static inline void perf_event_comm(struct task_struct *tsk) { } |
diff --git a/kernel/Makefile b/kernel/Makefile index b8d4cd8ac0b9..17b575ec7d07 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
@@ -95,6 +95,7 @@ obj-$(CONFIG_RING_BUFFER) += trace/ | |||
95 | obj-$(CONFIG_SMP) += sched_cpupri.o | 95 | obj-$(CONFIG_SMP) += sched_cpupri.o |
96 | obj-$(CONFIG_SLOW_WORK) += slow-work.o | 96 | obj-$(CONFIG_SLOW_WORK) += slow-work.o |
97 | obj-$(CONFIG_PERF_EVENTS) += perf_event.o | 97 | obj-$(CONFIG_PERF_EVENTS) += perf_event.o |
98 | obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o | ||
98 | 99 | ||
99 | ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) | 100 | ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) |
100 | # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is | 101 | # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is |
diff --git a/kernel/exit.c b/kernel/exit.c index f7864ac2ecc1..3f45e3cf931d 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -49,6 +49,7 @@ | |||
49 | #include <linux/init_task.h> | 49 | #include <linux/init_task.h> |
50 | #include <linux/perf_event.h> | 50 | #include <linux/perf_event.h> |
51 | #include <trace/events/sched.h> | 51 | #include <trace/events/sched.h> |
52 | #include <linux/hw_breakpoint.h> | ||
52 | 53 | ||
53 | #include <asm/uaccess.h> | 54 | #include <asm/uaccess.h> |
54 | #include <asm/unistd.h> | 55 | #include <asm/unistd.h> |
@@ -978,6 +979,10 @@ NORET_TYPE void do_exit(long code) | |||
978 | proc_exit_connector(tsk); | 979 | proc_exit_connector(tsk); |
979 | 980 | ||
980 | /* | 981 | /* |
982 | * FIXME: do that only when needed, using sched_exit tracepoint | ||
983 | */ | ||
984 | flush_ptrace_hw_breakpoint(tsk); | ||
985 | /* | ||
981 | * Flush inherited counters to the parent - before the parent | 986 | * Flush inherited counters to the parent - before the parent |
982 | * gets woken up by child-exit notifications. | 987 | * gets woken up by child-exit notifications. |
983 | */ | 988 | */ |
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c new file mode 100644 index 000000000000..9ea9414e0e58 --- /dev/null +++ b/kernel/hw_breakpoint.c | |||
@@ -0,0 +1,494 @@ | |||
1 | /* | ||
2 | * This program is free software; you can redistribute it and/or modify | ||
3 | * it under the terms of the GNU General Public License as published by | ||
4 | * the Free Software Foundation; either version 2 of the License, or | ||
5 | * (at your option) any later version. | ||
6 | * | ||
7 | * This program is distributed in the hope that it will be useful, | ||
8 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
10 | * GNU General Public License for more details. | ||
11 | * | ||
12 | * You should have received a copy of the GNU General Public License | ||
13 | * along with this program; if not, write to the Free Software | ||
14 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
15 | * | ||
16 | * Copyright (C) 2007 Alan Stern | ||
17 | * Copyright (C) IBM Corporation, 2009 | ||
18 | * Copyright (C) 2009, Frederic Weisbecker <fweisbec@gmail.com> | ||
19 | * | ||
20 | * Thanks to Ingo Molnar for his many suggestions. | ||
21 | */ | ||
22 | |||
23 | /* | ||
24 | * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility, | ||
25 | * using the CPU's debug registers. | ||
26 | * This file contains the arch-independent routines. | ||
27 | */ | ||
28 | |||
29 | #include <linux/irqflags.h> | ||
30 | #include <linux/kallsyms.h> | ||
31 | #include <linux/notifier.h> | ||
32 | #include <linux/kprobes.h> | ||
33 | #include <linux/kdebug.h> | ||
34 | #include <linux/kernel.h> | ||
35 | #include <linux/module.h> | ||
36 | #include <linux/percpu.h> | ||
37 | #include <linux/sched.h> | ||
38 | #include <linux/init.h> | ||
39 | #include <linux/smp.h> | ||
40 | |||
41 | #include <linux/hw_breakpoint.h> | ||
42 | |||
43 | #include <asm/processor.h> | ||
44 | |||
45 | #ifdef CONFIG_X86 | ||
46 | #include <asm/debugreg.h> | ||
47 | #endif | ||
48 | |||
49 | /* | ||
50 | * Constraints data | ||
51 | */ | ||
52 | |||
53 | /* Number of pinned cpu breakpoints in a cpu */ | ||
54 | static DEFINE_PER_CPU(unsigned int, nr_cpu_bp_pinned); | ||
55 | |||
56 | /* Number of pinned task breakpoints in a cpu */ | ||
57 | static DEFINE_PER_CPU(unsigned int, task_bp_pinned[HBP_NUM]); | ||
58 | |||
59 | /* Number of non-pinned cpu/task breakpoints in a cpu */ | ||
60 | static DEFINE_PER_CPU(unsigned int, nr_bp_flexible); | ||
61 | |||
62 | /* Gather the number of total pinned and un-pinned bp in a cpuset */ | ||
63 | struct bp_busy_slots { | ||
64 | unsigned int pinned; | ||
65 | unsigned int flexible; | ||
66 | }; | ||
67 | |||
68 | /* Serialize accesses to the above constraints */ | ||
69 | static DEFINE_MUTEX(nr_bp_mutex); | ||
70 | |||
71 | /* | ||
72 | * Report the maximum number of pinned breakpoints a task | ||
73 | * have in this cpu | ||
74 | */ | ||
75 | static unsigned int max_task_bp_pinned(int cpu) | ||
76 | { | ||
77 | int i; | ||
78 | unsigned int *tsk_pinned = per_cpu(task_bp_pinned, cpu); | ||
79 | |||
80 | for (i = HBP_NUM -1; i >= 0; i--) { | ||
81 | if (tsk_pinned[i] > 0) | ||
82 | return i + 1; | ||
83 | } | ||
84 | |||
85 | return 0; | ||
86 | } | ||
87 | |||
88 | /* | ||
89 | * Report the number of pinned/un-pinned breakpoints we have in | ||
90 | * a given cpu (cpu > -1) or in all of them (cpu = -1). | ||
91 | */ | ||
92 | static void fetch_bp_busy_slots(struct bp_busy_slots *slots, int cpu) | ||
93 | { | ||
94 | if (cpu >= 0) { | ||
95 | slots->pinned = per_cpu(nr_cpu_bp_pinned, cpu); | ||
96 | slots->pinned += max_task_bp_pinned(cpu); | ||
97 | slots->flexible = per_cpu(nr_bp_flexible, cpu); | ||
98 | |||
99 | return; | ||
100 | } | ||
101 | |||
102 | for_each_online_cpu(cpu) { | ||
103 | unsigned int nr; | ||
104 | |||
105 | nr = per_cpu(nr_cpu_bp_pinned, cpu); | ||
106 | nr += max_task_bp_pinned(cpu); | ||
107 | |||
108 | if (nr > slots->pinned) | ||
109 | slots->pinned = nr; | ||
110 | |||
111 | nr = per_cpu(nr_bp_flexible, cpu); | ||
112 | |||
113 | if (nr > slots->flexible) | ||
114 | slots->flexible = nr; | ||
115 | } | ||
116 | } | ||
117 | |||
118 | /* | ||
119 | * Add a pinned breakpoint for the given task in our constraint table | ||
120 | */ | ||
121 | static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable) | ||
122 | { | ||
123 | int count = 0; | ||
124 | struct perf_event *bp; | ||
125 | struct perf_event_context *ctx = tsk->perf_event_ctxp; | ||
126 | unsigned int *task_bp_pinned; | ||
127 | struct list_head *list; | ||
128 | unsigned long flags; | ||
129 | |||
130 | if (WARN_ONCE(!ctx, "No perf context for this task")) | ||
131 | return; | ||
132 | |||
133 | list = &ctx->event_list; | ||
134 | |||
135 | spin_lock_irqsave(&ctx->lock, flags); | ||
136 | |||
137 | /* | ||
138 | * The current breakpoint counter is not included in the list | ||
139 | * at the open() callback time | ||
140 | */ | ||
141 | list_for_each_entry(bp, list, event_entry) { | ||
142 | if (bp->attr.type == PERF_TYPE_BREAKPOINT) | ||
143 | count++; | ||
144 | } | ||
145 | |||
146 | spin_unlock_irqrestore(&ctx->lock, flags); | ||
147 | |||
148 | if (WARN_ONCE(count < 0, "No breakpoint counter found in the counter list")) | ||
149 | return; | ||
150 | |||
151 | task_bp_pinned = per_cpu(task_bp_pinned, cpu); | ||
152 | if (enable) { | ||
153 | task_bp_pinned[count]++; | ||
154 | if (count > 0) | ||
155 | task_bp_pinned[count-1]--; | ||
156 | } else { | ||
157 | task_bp_pinned[count]--; | ||
158 | if (count > 0) | ||
159 | task_bp_pinned[count-1]++; | ||
160 | } | ||
161 | } | ||
162 | |||
163 | /* | ||
164 | * Add/remove the given breakpoint in our constraint table | ||
165 | */ | ||
166 | static void toggle_bp_slot(struct perf_event *bp, bool enable) | ||
167 | { | ||
168 | int cpu = bp->cpu; | ||
169 | struct task_struct *tsk = bp->ctx->task; | ||
170 | |||
171 | /* Pinned counter task profiling */ | ||
172 | if (tsk) { | ||
173 | if (cpu >= 0) { | ||
174 | toggle_bp_task_slot(tsk, cpu, enable); | ||
175 | return; | ||
176 | } | ||
177 | |||
178 | for_each_online_cpu(cpu) | ||
179 | toggle_bp_task_slot(tsk, cpu, enable); | ||
180 | return; | ||
181 | } | ||
182 | |||
183 | /* Pinned counter cpu profiling */ | ||
184 | if (enable) | ||
185 | per_cpu(nr_cpu_bp_pinned, bp->cpu)++; | ||
186 | else | ||
187 | per_cpu(nr_cpu_bp_pinned, bp->cpu)--; | ||
188 | } | ||
189 | |||
190 | /* | ||
191 | * Contraints to check before allowing this new breakpoint counter: | ||
192 | * | ||
193 | * == Non-pinned counter == (Considered as pinned for now) | ||
194 | * | ||
195 | * - If attached to a single cpu, check: | ||
196 | * | ||
197 | * (per_cpu(nr_bp_flexible, cpu) || (per_cpu(nr_cpu_bp_pinned, cpu) | ||
198 | * + max(per_cpu(task_bp_pinned, cpu)))) < HBP_NUM | ||
199 | * | ||
200 | * -> If there are already non-pinned counters in this cpu, it means | ||
201 | * there is already a free slot for them. | ||
202 | * Otherwise, we check that the maximum number of per task | ||
203 | * breakpoints (for this cpu) plus the number of per cpu breakpoint | ||
204 | * (for this cpu) doesn't cover every registers. | ||
205 | * | ||
206 | * - If attached to every cpus, check: | ||
207 | * | ||
208 | * (per_cpu(nr_bp_flexible, *) || (max(per_cpu(nr_cpu_bp_pinned, *)) | ||
209 | * + max(per_cpu(task_bp_pinned, *)))) < HBP_NUM | ||
210 | * | ||
211 | * -> This is roughly the same, except we check the number of per cpu | ||
212 | * bp for every cpu and we keep the max one. Same for the per tasks | ||
213 | * breakpoints. | ||
214 | * | ||
215 | * | ||
216 | * == Pinned counter == | ||
217 | * | ||
218 | * - If attached to a single cpu, check: | ||
219 | * | ||
220 | * ((per_cpu(nr_bp_flexible, cpu) > 1) + per_cpu(nr_cpu_bp_pinned, cpu) | ||
221 | * + max(per_cpu(task_bp_pinned, cpu))) < HBP_NUM | ||
222 | * | ||
223 | * -> Same checks as before. But now the nr_bp_flexible, if any, must keep | ||
224 | * one register at least (or they will never be fed). | ||
225 | * | ||
226 | * - If attached to every cpus, check: | ||
227 | * | ||
228 | * ((per_cpu(nr_bp_flexible, *) > 1) + max(per_cpu(nr_cpu_bp_pinned, *)) | ||
229 | * + max(per_cpu(task_bp_pinned, *))) < HBP_NUM | ||
230 | */ | ||
231 | int reserve_bp_slot(struct perf_event *bp) | ||
232 | { | ||
233 | struct bp_busy_slots slots = {0}; | ||
234 | int ret = 0; | ||
235 | |||
236 | mutex_lock(&nr_bp_mutex); | ||
237 | |||
238 | fetch_bp_busy_slots(&slots, bp->cpu); | ||
239 | |||
240 | /* Flexible counters need to keep at least one slot */ | ||
241 | if (slots.pinned + (!!slots.flexible) == HBP_NUM) { | ||
242 | ret = -ENOSPC; | ||
243 | goto end; | ||
244 | } | ||
245 | |||
246 | toggle_bp_slot(bp, true); | ||
247 | |||
248 | end: | ||
249 | mutex_unlock(&nr_bp_mutex); | ||
250 | |||
251 | return ret; | ||
252 | } | ||
253 | |||
254 | void release_bp_slot(struct perf_event *bp) | ||
255 | { | ||
256 | mutex_lock(&nr_bp_mutex); | ||
257 | |||
258 | toggle_bp_slot(bp, false); | ||
259 | |||
260 | mutex_unlock(&nr_bp_mutex); | ||
261 | } | ||
262 | |||
263 | |||
264 | int __register_perf_hw_breakpoint(struct perf_event *bp) | ||
265 | { | ||
266 | int ret; | ||
267 | |||
268 | ret = reserve_bp_slot(bp); | ||
269 | if (ret) | ||
270 | return ret; | ||
271 | |||
272 | if (!bp->attr.disabled) | ||
273 | ret = arch_validate_hwbkpt_settings(bp, bp->ctx->task); | ||
274 | |||
275 | return ret; | ||
276 | } | ||
277 | |||
278 | int register_perf_hw_breakpoint(struct perf_event *bp) | ||
279 | { | ||
280 | bp->callback = perf_bp_event; | ||
281 | |||
282 | return __register_perf_hw_breakpoint(bp); | ||
283 | } | ||
284 | |||
285 | /* | ||
286 | * Register a breakpoint bound to a task and a given cpu. | ||
287 | * If cpu is -1, the breakpoint is active for the task in every cpu | ||
288 | * If the task is -1, the breakpoint is active for every tasks in the given | ||
289 | * cpu. | ||
290 | */ | ||
291 | static struct perf_event * | ||
292 | register_user_hw_breakpoint_cpu(unsigned long addr, | ||
293 | int len, | ||
294 | int type, | ||
295 | perf_callback_t triggered, | ||
296 | pid_t pid, | ||
297 | int cpu, | ||
298 | bool active) | ||
299 | { | ||
300 | struct perf_event_attr *attr; | ||
301 | struct perf_event *bp; | ||
302 | |||
303 | attr = kzalloc(sizeof(*attr), GFP_KERNEL); | ||
304 | if (!attr) | ||
305 | return ERR_PTR(-ENOMEM); | ||
306 | |||
307 | attr->type = PERF_TYPE_BREAKPOINT; | ||
308 | attr->size = sizeof(*attr); | ||
309 | attr->bp_addr = addr; | ||
310 | attr->bp_len = len; | ||
311 | attr->bp_type = type; | ||
312 | /* | ||
313 | * Such breakpoints are used by debuggers to trigger signals when | ||
314 | * we hit the excepted memory op. We can't miss such events, they | ||
315 | * must be pinned. | ||
316 | */ | ||
317 | attr->pinned = 1; | ||
318 | |||
319 | if (!active) | ||
320 | attr->disabled = 1; | ||
321 | |||
322 | bp = perf_event_create_kernel_counter(attr, cpu, pid, triggered); | ||
323 | kfree(attr); | ||
324 | |||
325 | return bp; | ||
326 | } | ||
327 | |||
328 | /** | ||
329 | * register_user_hw_breakpoint - register a hardware breakpoint for user space | ||
330 | * @addr: is the memory address that triggers the breakpoint | ||
331 | * @len: the length of the access to the memory (1 byte, 2 bytes etc...) | ||
332 | * @type: the type of the access to the memory (read/write/exec) | ||
333 | * @triggered: callback to trigger when we hit the breakpoint | ||
334 | * @tsk: pointer to 'task_struct' of the process to which the address belongs | ||
335 | * @active: should we activate it while registering it | ||
336 | * | ||
337 | */ | ||
338 | struct perf_event * | ||
339 | register_user_hw_breakpoint(unsigned long addr, | ||
340 | int len, | ||
341 | int type, | ||
342 | perf_callback_t triggered, | ||
343 | struct task_struct *tsk, | ||
344 | bool active) | ||
345 | { | ||
346 | return register_user_hw_breakpoint_cpu(addr, len, type, triggered, | ||
347 | tsk->pid, -1, active); | ||
348 | } | ||
349 | EXPORT_SYMBOL_GPL(register_user_hw_breakpoint); | ||
350 | |||
351 | /** | ||
352 | * modify_user_hw_breakpoint - modify a user-space hardware breakpoint | ||
353 | * @bp: the breakpoint structure to modify | ||
354 | * @addr: is the memory address that triggers the breakpoint | ||
355 | * @len: the length of the access to the memory (1 byte, 2 bytes etc...) | ||
356 | * @type: the type of the access to the memory (read/write/exec) | ||
357 | * @triggered: callback to trigger when we hit the breakpoint | ||
358 | * @tsk: pointer to 'task_struct' of the process to which the address belongs | ||
359 | * @active: should we activate it while registering it | ||
360 | */ | ||
361 | struct perf_event * | ||
362 | modify_user_hw_breakpoint(struct perf_event *bp, | ||
363 | unsigned long addr, | ||
364 | int len, | ||
365 | int type, | ||
366 | perf_callback_t triggered, | ||
367 | struct task_struct *tsk, | ||
368 | bool active) | ||
369 | { | ||
370 | /* | ||
371 | * FIXME: do it without unregistering | ||
372 | * - We don't want to lose our slot | ||
373 | * - If the new bp is incorrect, don't lose the older one | ||
374 | */ | ||
375 | unregister_hw_breakpoint(bp); | ||
376 | |||
377 | return register_user_hw_breakpoint(addr, len, type, triggered, | ||
378 | tsk, active); | ||
379 | } | ||
380 | EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint); | ||
381 | |||
382 | /** | ||
383 | * unregister_hw_breakpoint - unregister a user-space hardware breakpoint | ||
384 | * @bp: the breakpoint structure to unregister | ||
385 | */ | ||
386 | void unregister_hw_breakpoint(struct perf_event *bp) | ||
387 | { | ||
388 | if (!bp) | ||
389 | return; | ||
390 | perf_event_release_kernel(bp); | ||
391 | } | ||
392 | EXPORT_SYMBOL_GPL(unregister_hw_breakpoint); | ||
393 | |||
394 | static struct perf_event * | ||
395 | register_kernel_hw_breakpoint_cpu(unsigned long addr, | ||
396 | int len, | ||
397 | int type, | ||
398 | perf_callback_t triggered, | ||
399 | int cpu, | ||
400 | bool active) | ||
401 | { | ||
402 | return register_user_hw_breakpoint_cpu(addr, len, type, triggered, | ||
403 | -1, cpu, active); | ||
404 | } | ||
405 | |||
406 | /** | ||
407 | * register_wide_hw_breakpoint - register a wide breakpoint in the kernel | ||
408 | * @addr: is the memory address that triggers the breakpoint | ||
409 | * @len: the length of the access to the memory (1 byte, 2 bytes etc...) | ||
410 | * @type: the type of the access to the memory (read/write/exec) | ||
411 | * @triggered: callback to trigger when we hit the breakpoint | ||
412 | * @active: should we activate it while registering it | ||
413 | * | ||
414 | * @return a set of per_cpu pointers to perf events | ||
415 | */ | ||
416 | struct perf_event ** | ||
417 | register_wide_hw_breakpoint(unsigned long addr, | ||
418 | int len, | ||
419 | int type, | ||
420 | perf_callback_t triggered, | ||
421 | bool active) | ||
422 | { | ||
423 | struct perf_event **cpu_events, **pevent, *bp; | ||
424 | long err; | ||
425 | int cpu; | ||
426 | |||
427 | cpu_events = alloc_percpu(typeof(*cpu_events)); | ||
428 | if (!cpu_events) | ||
429 | return ERR_PTR(-ENOMEM); | ||
430 | |||
431 | for_each_possible_cpu(cpu) { | ||
432 | pevent = per_cpu_ptr(cpu_events, cpu); | ||
433 | bp = register_kernel_hw_breakpoint_cpu(addr, len, type, | ||
434 | triggered, cpu, active); | ||
435 | |||
436 | *pevent = bp; | ||
437 | |||
438 | if (IS_ERR(bp) || !bp) { | ||
439 | err = PTR_ERR(bp); | ||
440 | goto fail; | ||
441 | } | ||
442 | } | ||
443 | |||
444 | return cpu_events; | ||
445 | |||
446 | fail: | ||
447 | for_each_possible_cpu(cpu) { | ||
448 | pevent = per_cpu_ptr(cpu_events, cpu); | ||
449 | if (IS_ERR(*pevent) || !*pevent) | ||
450 | break; | ||
451 | unregister_hw_breakpoint(*pevent); | ||
452 | } | ||
453 | free_percpu(cpu_events); | ||
454 | /* return the error if any */ | ||
455 | return ERR_PTR(err); | ||
456 | } | ||
457 | EXPORT_SYMBOL_GPL(register_wide_hw_breakpoint); | ||
458 | |||
459 | /** | ||
460 | * unregister_wide_hw_breakpoint - unregister a wide breakpoint in the kernel | ||
461 | * @cpu_events: the per cpu set of events to unregister | ||
462 | */ | ||
463 | void unregister_wide_hw_breakpoint(struct perf_event **cpu_events) | ||
464 | { | ||
465 | int cpu; | ||
466 | struct perf_event **pevent; | ||
467 | |||
468 | for_each_possible_cpu(cpu) { | ||
469 | pevent = per_cpu_ptr(cpu_events, cpu); | ||
470 | unregister_hw_breakpoint(*pevent); | ||
471 | } | ||
472 | free_percpu(cpu_events); | ||
473 | } | ||
474 | EXPORT_SYMBOL_GPL(unregister_wide_hw_breakpoint); | ||
475 | |||
476 | static struct notifier_block hw_breakpoint_exceptions_nb = { | ||
477 | .notifier_call = hw_breakpoint_exceptions_notify, | ||
478 | /* we need to be notified first */ | ||
479 | .priority = 0x7fffffff | ||
480 | }; | ||
481 | |||
482 | static int __init init_hw_breakpoint(void) | ||
483 | { | ||
484 | return register_die_notifier(&hw_breakpoint_exceptions_nb); | ||
485 | } | ||
486 | core_initcall(init_hw_breakpoint); | ||
487 | |||
488 | |||
489 | struct pmu perf_ops_bp = { | ||
490 | .enable = arch_install_hw_breakpoint, | ||
491 | .disable = arch_uninstall_hw_breakpoint, | ||
492 | .read = hw_breakpoint_pmu_read, | ||
493 | .unthrottle = hw_breakpoint_pmu_unthrottle | ||
494 | }; | ||
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index 8b6b8b697c68..8e5288a8a355 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c | |||
@@ -181,6 +181,7 @@ unsigned long kallsyms_lookup_name(const char *name) | |||
181 | } | 181 | } |
182 | return module_kallsyms_lookup_name(name); | 182 | return module_kallsyms_lookup_name(name); |
183 | } | 183 | } |
184 | EXPORT_SYMBOL_GPL(kallsyms_lookup_name); | ||
184 | 185 | ||
185 | int kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *, | 186 | int kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *, |
186 | unsigned long), | 187 | unsigned long), |
diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 3256e36ad251..3852e2656bb0 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c | |||
@@ -29,6 +29,7 @@ | |||
29 | #include <linux/kernel_stat.h> | 29 | #include <linux/kernel_stat.h> |
30 | #include <linux/perf_event.h> | 30 | #include <linux/perf_event.h> |
31 | #include <linux/ftrace_event.h> | 31 | #include <linux/ftrace_event.h> |
32 | #include <linux/hw_breakpoint.h> | ||
32 | 33 | ||
33 | #include <asm/irq_regs.h> | 34 | #include <asm/irq_regs.h> |
34 | 35 | ||
@@ -1725,6 +1726,26 @@ static int perf_release(struct inode *inode, struct file *file) | |||
1725 | return 0; | 1726 | return 0; |
1726 | } | 1727 | } |
1727 | 1728 | ||
1729 | int perf_event_release_kernel(struct perf_event *event) | ||
1730 | { | ||
1731 | struct perf_event_context *ctx = event->ctx; | ||
1732 | |||
1733 | WARN_ON_ONCE(ctx->parent_ctx); | ||
1734 | mutex_lock(&ctx->mutex); | ||
1735 | perf_event_remove_from_context(event); | ||
1736 | mutex_unlock(&ctx->mutex); | ||
1737 | |||
1738 | mutex_lock(&event->owner->perf_event_mutex); | ||
1739 | list_del_init(&event->owner_entry); | ||
1740 | mutex_unlock(&event->owner->perf_event_mutex); | ||
1741 | put_task_struct(event->owner); | ||
1742 | |||
1743 | free_event(event); | ||
1744 | |||
1745 | return 0; | ||
1746 | } | ||
1747 | EXPORT_SYMBOL_GPL(perf_event_release_kernel); | ||
1748 | |||
1728 | static int perf_event_read_size(struct perf_event *event) | 1749 | static int perf_event_read_size(struct perf_event *event) |
1729 | { | 1750 | { |
1730 | int entry = sizeof(u64); /* value */ | 1751 | int entry = sizeof(u64); /* value */ |
@@ -1750,7 +1771,7 @@ static int perf_event_read_size(struct perf_event *event) | |||
1750 | return size; | 1771 | return size; |
1751 | } | 1772 | } |
1752 | 1773 | ||
1753 | static u64 perf_event_read_value(struct perf_event *event) | 1774 | u64 perf_event_read_value(struct perf_event *event) |
1754 | { | 1775 | { |
1755 | struct perf_event *child; | 1776 | struct perf_event *child; |
1756 | u64 total = 0; | 1777 | u64 total = 0; |
@@ -1761,6 +1782,7 @@ static u64 perf_event_read_value(struct perf_event *event) | |||
1761 | 1782 | ||
1762 | return total; | 1783 | return total; |
1763 | } | 1784 | } |
1785 | EXPORT_SYMBOL_GPL(perf_event_read_value); | ||
1764 | 1786 | ||
1765 | static int perf_event_read_entry(struct perf_event *event, | 1787 | static int perf_event_read_entry(struct perf_event *event, |
1766 | u64 read_format, char __user *buf) | 1788 | u64 read_format, char __user *buf) |
@@ -4231,6 +4253,51 @@ static void perf_event_free_filter(struct perf_event *event) | |||
4231 | 4253 | ||
4232 | #endif /* CONFIG_EVENT_PROFILE */ | 4254 | #endif /* CONFIG_EVENT_PROFILE */ |
4233 | 4255 | ||
4256 | #ifdef CONFIG_HAVE_HW_BREAKPOINT | ||
4257 | static void bp_perf_event_destroy(struct perf_event *event) | ||
4258 | { | ||
4259 | release_bp_slot(event); | ||
4260 | } | ||
4261 | |||
4262 | static const struct pmu *bp_perf_event_init(struct perf_event *bp) | ||
4263 | { | ||
4264 | int err; | ||
4265 | /* | ||
4266 | * The breakpoint is already filled if we haven't created the counter | ||
4267 | * through perf syscall | ||
4268 | * FIXME: manage to get trigerred to NULL if it comes from syscalls | ||
4269 | */ | ||
4270 | if (!bp->callback) | ||
4271 | err = register_perf_hw_breakpoint(bp); | ||
4272 | else | ||
4273 | err = __register_perf_hw_breakpoint(bp); | ||
4274 | if (err) | ||
4275 | return ERR_PTR(err); | ||
4276 | |||
4277 | bp->destroy = bp_perf_event_destroy; | ||
4278 | |||
4279 | return &perf_ops_bp; | ||
4280 | } | ||
4281 | |||
4282 | void perf_bp_event(struct perf_event *bp, void *regs) | ||
4283 | { | ||
4284 | /* TODO */ | ||
4285 | } | ||
4286 | #else | ||
4287 | static void bp_perf_event_destroy(struct perf_event *event) | ||
4288 | { | ||
4289 | } | ||
4290 | |||
4291 | static const struct pmu *bp_perf_event_init(struct perf_event *bp) | ||
4292 | { | ||
4293 | return NULL; | ||
4294 | } | ||
4295 | |||
4296 | void perf_bp_event(struct perf_event *bp, void *regs) | ||
4297 | { | ||
4298 | } | ||
4299 | #endif | ||
4300 | |||
4234 | atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX]; | 4301 | atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX]; |
4235 | 4302 | ||
4236 | static void sw_perf_event_destroy(struct perf_event *event) | 4303 | static void sw_perf_event_destroy(struct perf_event *event) |
@@ -4297,6 +4364,7 @@ perf_event_alloc(struct perf_event_attr *attr, | |||
4297 | struct perf_event_context *ctx, | 4364 | struct perf_event_context *ctx, |
4298 | struct perf_event *group_leader, | 4365 | struct perf_event *group_leader, |
4299 | struct perf_event *parent_event, | 4366 | struct perf_event *parent_event, |
4367 | perf_callback_t callback, | ||
4300 | gfp_t gfpflags) | 4368 | gfp_t gfpflags) |
4301 | { | 4369 | { |
4302 | const struct pmu *pmu; | 4370 | const struct pmu *pmu; |
@@ -4339,6 +4407,11 @@ perf_event_alloc(struct perf_event_attr *attr, | |||
4339 | 4407 | ||
4340 | event->state = PERF_EVENT_STATE_INACTIVE; | 4408 | event->state = PERF_EVENT_STATE_INACTIVE; |
4341 | 4409 | ||
4410 | if (!callback && parent_event) | ||
4411 | callback = parent_event->callback; | ||
4412 | |||
4413 | event->callback = callback; | ||
4414 | |||
4342 | if (attr->disabled) | 4415 | if (attr->disabled) |
4343 | event->state = PERF_EVENT_STATE_OFF; | 4416 | event->state = PERF_EVENT_STATE_OFF; |
4344 | 4417 | ||
@@ -4373,6 +4446,11 @@ perf_event_alloc(struct perf_event_attr *attr, | |||
4373 | pmu = tp_perf_event_init(event); | 4446 | pmu = tp_perf_event_init(event); |
4374 | break; | 4447 | break; |
4375 | 4448 | ||
4449 | case PERF_TYPE_BREAKPOINT: | ||
4450 | pmu = bp_perf_event_init(event); | ||
4451 | break; | ||
4452 | |||
4453 | |||
4376 | default: | 4454 | default: |
4377 | break; | 4455 | break; |
4378 | } | 4456 | } |
@@ -4615,7 +4693,7 @@ SYSCALL_DEFINE5(perf_event_open, | |||
4615 | } | 4693 | } |
4616 | 4694 | ||
4617 | event = perf_event_alloc(&attr, cpu, ctx, group_leader, | 4695 | event = perf_event_alloc(&attr, cpu, ctx, group_leader, |
4618 | NULL, GFP_KERNEL); | 4696 | NULL, NULL, GFP_KERNEL); |
4619 | err = PTR_ERR(event); | 4697 | err = PTR_ERR(event); |
4620 | if (IS_ERR(event)) | 4698 | if (IS_ERR(event)) |
4621 | goto err_put_context; | 4699 | goto err_put_context; |
@@ -4663,6 +4741,58 @@ err_put_context: | |||
4663 | return err; | 4741 | return err; |
4664 | } | 4742 | } |
4665 | 4743 | ||
4744 | /** | ||
4745 | * perf_event_create_kernel_counter | ||
4746 | * | ||
4747 | * @attr: attributes of the counter to create | ||
4748 | * @cpu: cpu in which the counter is bound | ||
4749 | * @pid: task to profile | ||
4750 | */ | ||
4751 | struct perf_event * | ||
4752 | perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, | ||
4753 | pid_t pid, perf_callback_t callback) | ||
4754 | { | ||
4755 | struct perf_event *event; | ||
4756 | struct perf_event_context *ctx; | ||
4757 | int err; | ||
4758 | |||
4759 | /* | ||
4760 | * Get the target context (task or percpu): | ||
4761 | */ | ||
4762 | |||
4763 | ctx = find_get_context(pid, cpu); | ||
4764 | if (IS_ERR(ctx)) | ||
4765 | return NULL; | ||
4766 | |||
4767 | event = perf_event_alloc(attr, cpu, ctx, NULL, | ||
4768 | NULL, callback, GFP_KERNEL); | ||
4769 | err = PTR_ERR(event); | ||
4770 | if (IS_ERR(event)) | ||
4771 | goto err_put_context; | ||
4772 | |||
4773 | event->filp = NULL; | ||
4774 | WARN_ON_ONCE(ctx->parent_ctx); | ||
4775 | mutex_lock(&ctx->mutex); | ||
4776 | perf_install_in_context(ctx, event, cpu); | ||
4777 | ++ctx->generation; | ||
4778 | mutex_unlock(&ctx->mutex); | ||
4779 | |||
4780 | event->owner = current; | ||
4781 | get_task_struct(current); | ||
4782 | mutex_lock(¤t->perf_event_mutex); | ||
4783 | list_add_tail(&event->owner_entry, ¤t->perf_event_list); | ||
4784 | mutex_unlock(¤t->perf_event_mutex); | ||
4785 | |||
4786 | return event; | ||
4787 | |||
4788 | err_put_context: | ||
4789 | if (err < 0) | ||
4790 | put_ctx(ctx); | ||
4791 | |||
4792 | return NULL; | ||
4793 | } | ||
4794 | EXPORT_SYMBOL_GPL(perf_event_create_kernel_counter); | ||
4795 | |||
4666 | /* | 4796 | /* |
4667 | * inherit a event from parent task to child task: | 4797 | * inherit a event from parent task to child task: |
4668 | */ | 4798 | */ |
@@ -4688,7 +4818,7 @@ inherit_event(struct perf_event *parent_event, | |||
4688 | child_event = perf_event_alloc(&parent_event->attr, | 4818 | child_event = perf_event_alloc(&parent_event->attr, |
4689 | parent_event->cpu, child_ctx, | 4819 | parent_event->cpu, child_ctx, |
4690 | group_leader, parent_event, | 4820 | group_leader, parent_event, |
4691 | GFP_KERNEL); | 4821 | NULL, GFP_KERNEL); |
4692 | if (IS_ERR(child_event)) | 4822 | if (IS_ERR(child_event)) |
4693 | return child_event; | 4823 | return child_event; |
4694 | get_ctx(child_ctx); | 4824 | get_ctx(child_ctx); |
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index f05671609a89..d006554888dc 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig | |||
@@ -339,6 +339,27 @@ config POWER_TRACER | |||
339 | power management decisions, specifically the C-state and P-state | 339 | power management decisions, specifically the C-state and P-state |
340 | behavior. | 340 | behavior. |
341 | 341 | ||
342 | config KSYM_TRACER | ||
343 | bool "Trace read and write access on kernel memory locations" | ||
344 | depends on HAVE_HW_BREAKPOINT | ||
345 | select TRACING | ||
346 | help | ||
347 | This tracer helps find read and write operations on any given kernel | ||
348 | symbol i.e. /proc/kallsyms. | ||
349 | |||
350 | config PROFILE_KSYM_TRACER | ||
351 | bool "Profile all kernel memory accesses on 'watched' variables" | ||
352 | depends on KSYM_TRACER | ||
353 | help | ||
354 | This tracer profiles kernel accesses on variables watched through the | ||
355 | ksym tracer ftrace plugin. Depending upon the hardware, all read | ||
356 | and write operations on kernel variables can be monitored for | ||
357 | accesses. | ||
358 | |||
359 | The results will be displayed in: | ||
360 | /debugfs/tracing/profile_ksym | ||
361 | |||
362 | Say N if unsure. | ||
342 | 363 | ||
343 | config STACK_TRACER | 364 | config STACK_TRACER |
344 | bool "Trace max stack" | 365 | bool "Trace max stack" |
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index edc3a3cca1a1..cd9ecd89ec77 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile | |||
@@ -54,6 +54,7 @@ obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o | |||
54 | obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o | 54 | obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o |
55 | obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o | 55 | obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o |
56 | obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o | 56 | obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o |
57 | obj-$(CONFIG_KSYM_TRACER) += trace_ksym.o | ||
57 | obj-$(CONFIG_EVENT_TRACING) += power-traces.o | 58 | obj-$(CONFIG_EVENT_TRACING) += power-traces.o |
58 | 59 | ||
59 | libftrace-y := ftrace.o | 60 | libftrace-y := ftrace.o |
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index b4e4212e66d7..4da6ede74401 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h | |||
@@ -11,6 +11,7 @@ | |||
11 | #include <linux/ftrace.h> | 11 | #include <linux/ftrace.h> |
12 | #include <trace/boot.h> | 12 | #include <trace/boot.h> |
13 | #include <linux/kmemtrace.h> | 13 | #include <linux/kmemtrace.h> |
14 | #include <linux/hw_breakpoint.h> | ||
14 | 15 | ||
15 | #include <linux/trace_seq.h> | 16 | #include <linux/trace_seq.h> |
16 | #include <linux/ftrace_event.h> | 17 | #include <linux/ftrace_event.h> |
@@ -37,6 +38,7 @@ enum trace_type { | |||
37 | TRACE_KMEM_ALLOC, | 38 | TRACE_KMEM_ALLOC, |
38 | TRACE_KMEM_FREE, | 39 | TRACE_KMEM_FREE, |
39 | TRACE_BLK, | 40 | TRACE_BLK, |
41 | TRACE_KSYM, | ||
40 | 42 | ||
41 | __TRACE_LAST_TYPE, | 43 | __TRACE_LAST_TYPE, |
42 | }; | 44 | }; |
@@ -232,6 +234,7 @@ extern void __ftrace_bad_type(void); | |||
232 | TRACE_KMEM_ALLOC); \ | 234 | TRACE_KMEM_ALLOC); \ |
233 | IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \ | 235 | IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \ |
234 | TRACE_KMEM_FREE); \ | 236 | TRACE_KMEM_FREE); \ |
237 | IF_ASSIGN(var, ent, struct ksym_trace_entry, TRACE_KSYM);\ | ||
235 | __ftrace_bad_type(); \ | 238 | __ftrace_bad_type(); \ |
236 | } while (0) | 239 | } while (0) |
237 | 240 | ||
@@ -387,6 +390,8 @@ int register_tracer(struct tracer *type); | |||
387 | void unregister_tracer(struct tracer *type); | 390 | void unregister_tracer(struct tracer *type); |
388 | int is_tracing_stopped(void); | 391 | int is_tracing_stopped(void); |
389 | 392 | ||
393 | extern int process_new_ksym_entry(char *ksymname, int op, unsigned long addr); | ||
394 | |||
390 | extern unsigned long nsecs_to_usecs(unsigned long nsecs); | 395 | extern unsigned long nsecs_to_usecs(unsigned long nsecs); |
391 | 396 | ||
392 | #ifdef CONFIG_TRACER_MAX_TRACE | 397 | #ifdef CONFIG_TRACER_MAX_TRACE |
@@ -461,6 +466,8 @@ extern int trace_selftest_startup_branch(struct tracer *trace, | |||
461 | struct trace_array *tr); | 466 | struct trace_array *tr); |
462 | extern int trace_selftest_startup_hw_branches(struct tracer *trace, | 467 | extern int trace_selftest_startup_hw_branches(struct tracer *trace, |
463 | struct trace_array *tr); | 468 | struct trace_array *tr); |
469 | extern int trace_selftest_startup_ksym(struct tracer *trace, | ||
470 | struct trace_array *tr); | ||
464 | #endif /* CONFIG_FTRACE_STARTUP_TEST */ | 471 | #endif /* CONFIG_FTRACE_STARTUP_TEST */ |
465 | 472 | ||
466 | extern void *head_page(struct trace_array_cpu *data); | 473 | extern void *head_page(struct trace_array_cpu *data); |
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h index ead3d724599d..c16a08f399df 100644 --- a/kernel/trace/trace_entries.h +++ b/kernel/trace/trace_entries.h | |||
@@ -364,3 +364,19 @@ FTRACE_ENTRY(kmem_free, kmemtrace_free_entry, | |||
364 | F_printk("type:%u call_site:%lx ptr:%p", | 364 | F_printk("type:%u call_site:%lx ptr:%p", |
365 | __entry->type_id, __entry->call_site, __entry->ptr) | 365 | __entry->type_id, __entry->call_site, __entry->ptr) |
366 | ); | 366 | ); |
367 | |||
368 | FTRACE_ENTRY(ksym_trace, ksym_trace_entry, | ||
369 | |||
370 | TRACE_KSYM, | ||
371 | |||
372 | F_STRUCT( | ||
373 | __field( unsigned long, ip ) | ||
374 | __field( unsigned char, type ) | ||
375 | __array( char , cmd, TASK_COMM_LEN ) | ||
376 | __field( unsigned long, addr ) | ||
377 | ), | ||
378 | |||
379 | F_printk("ip: %pF type: %d ksym_name: %pS cmd: %s", | ||
380 | (void *)__entry->ip, (unsigned int)__entry->type, | ||
381 | (void *)__entry->addr, __entry->cmd) | ||
382 | ); | ||
diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c new file mode 100644 index 000000000000..11935b53a6cb --- /dev/null +++ b/kernel/trace/trace_ksym.c | |||
@@ -0,0 +1,554 @@ | |||
1 | /* | ||
2 | * trace_ksym.c - Kernel Symbol Tracer | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License as published by | ||
6 | * the Free Software Foundation; either version 2 of the License, or | ||
7 | * (at your option) any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write to the Free Software | ||
16 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
17 | * | ||
18 | * Copyright (C) IBM Corporation, 2009 | ||
19 | */ | ||
20 | |||
21 | #include <linux/kallsyms.h> | ||
22 | #include <linux/uaccess.h> | ||
23 | #include <linux/debugfs.h> | ||
24 | #include <linux/ftrace.h> | ||
25 | #include <linux/module.h> | ||
26 | #include <linux/fs.h> | ||
27 | |||
28 | #include "trace_output.h" | ||
29 | #include "trace_stat.h" | ||
30 | #include "trace.h" | ||
31 | |||
32 | #include <linux/hw_breakpoint.h> | ||
33 | #include <asm/hw_breakpoint.h> | ||
34 | |||
35 | /* | ||
36 | * For now, let us restrict the no. of symbols traced simultaneously to number | ||
37 | * of available hardware breakpoint registers. | ||
38 | */ | ||
39 | #define KSYM_TRACER_MAX HBP_NUM | ||
40 | |||
41 | #define KSYM_TRACER_OP_LEN 3 /* rw- */ | ||
42 | |||
43 | struct trace_ksym { | ||
44 | struct perf_event **ksym_hbp; | ||
45 | unsigned long ksym_addr; | ||
46 | int type; | ||
47 | int len; | ||
48 | #ifdef CONFIG_PROFILE_KSYM_TRACER | ||
49 | unsigned long counter; | ||
50 | #endif | ||
51 | struct hlist_node ksym_hlist; | ||
52 | }; | ||
53 | |||
54 | static struct trace_array *ksym_trace_array; | ||
55 | |||
56 | static unsigned int ksym_filter_entry_count; | ||
57 | static unsigned int ksym_tracing_enabled; | ||
58 | |||
59 | static HLIST_HEAD(ksym_filter_head); | ||
60 | |||
61 | static DEFINE_MUTEX(ksym_tracer_mutex); | ||
62 | |||
63 | #ifdef CONFIG_PROFILE_KSYM_TRACER | ||
64 | |||
65 | #define MAX_UL_INT 0xffffffff | ||
66 | |||
67 | void ksym_collect_stats(unsigned long hbp_hit_addr) | ||
68 | { | ||
69 | struct hlist_node *node; | ||
70 | struct trace_ksym *entry; | ||
71 | |||
72 | rcu_read_lock(); | ||
73 | hlist_for_each_entry_rcu(entry, node, &ksym_filter_head, ksym_hlist) { | ||
74 | if ((entry->ksym_addr == hbp_hit_addr) && | ||
75 | (entry->counter <= MAX_UL_INT)) { | ||
76 | entry->counter++; | ||
77 | break; | ||
78 | } | ||
79 | } | ||
80 | rcu_read_unlock(); | ||
81 | } | ||
82 | #endif /* CONFIG_PROFILE_KSYM_TRACER */ | ||
83 | |||
84 | void ksym_hbp_handler(struct perf_event *hbp, void *data) | ||
85 | { | ||
86 | struct ring_buffer_event *event; | ||
87 | struct ksym_trace_entry *entry; | ||
88 | struct pt_regs *regs = data; | ||
89 | struct ring_buffer *buffer; | ||
90 | int pc; | ||
91 | |||
92 | if (!ksym_tracing_enabled) | ||
93 | return; | ||
94 | |||
95 | buffer = ksym_trace_array->buffer; | ||
96 | |||
97 | pc = preempt_count(); | ||
98 | |||
99 | event = trace_buffer_lock_reserve(buffer, TRACE_KSYM, | ||
100 | sizeof(*entry), 0, pc); | ||
101 | if (!event) | ||
102 | return; | ||
103 | |||
104 | entry = ring_buffer_event_data(event); | ||
105 | entry->ip = instruction_pointer(regs); | ||
106 | entry->type = hw_breakpoint_type(hbp); | ||
107 | entry->addr = hw_breakpoint_addr(hbp); | ||
108 | strlcpy(entry->cmd, current->comm, TASK_COMM_LEN); | ||
109 | |||
110 | #ifdef CONFIG_PROFILE_KSYM_TRACER | ||
111 | ksym_collect_stats(hw_breakpoint_addr(hbp)); | ||
112 | #endif /* CONFIG_PROFILE_KSYM_TRACER */ | ||
113 | |||
114 | trace_buffer_unlock_commit(buffer, event, 0, pc); | ||
115 | } | ||
116 | |||
117 | /* Valid access types are represented as | ||
118 | * | ||
119 | * rw- : Set Read/Write Access Breakpoint | ||
120 | * -w- : Set Write Access Breakpoint | ||
121 | * --- : Clear Breakpoints | ||
122 | * --x : Set Execution Break points (Not available yet) | ||
123 | * | ||
124 | */ | ||
125 | static int ksym_trace_get_access_type(char *str) | ||
126 | { | ||
127 | int access = 0; | ||
128 | |||
129 | if (str[0] == 'r') | ||
130 | access |= HW_BREAKPOINT_R; | ||
131 | |||
132 | if (str[1] == 'w') | ||
133 | access |= HW_BREAKPOINT_W; | ||
134 | |||
135 | if (str[2] == 'x') | ||
136 | access |= HW_BREAKPOINT_X; | ||
137 | |||
138 | switch (access) { | ||
139 | case HW_BREAKPOINT_R: | ||
140 | case HW_BREAKPOINT_W: | ||
141 | case HW_BREAKPOINT_W | HW_BREAKPOINT_R: | ||
142 | return access; | ||
143 | default: | ||
144 | return -EINVAL; | ||
145 | } | ||
146 | } | ||
147 | |||
148 | /* | ||
149 | * There can be several possible malformed requests and we attempt to capture | ||
150 | * all of them. We enumerate some of the rules | ||
151 | * 1. We will not allow kernel symbols with ':' since it is used as a delimiter. | ||
152 | * i.e. multiple ':' symbols disallowed. Possible uses are of the form | ||
153 | * <module>:<ksym_name>:<op>. | ||
154 | * 2. No delimiter symbol ':' in the input string | ||
155 | * 3. Spurious operator symbols or symbols not in their respective positions | ||
156 | * 4. <ksym_name>:--- i.e. clear breakpoint request when ksym_name not in file | ||
157 | * 5. Kernel symbol not a part of /proc/kallsyms | ||
158 | * 6. Duplicate requests | ||
159 | */ | ||
160 | static int parse_ksym_trace_str(char *input_string, char **ksymname, | ||
161 | unsigned long *addr) | ||
162 | { | ||
163 | int ret; | ||
164 | |||
165 | *ksymname = strsep(&input_string, ":"); | ||
166 | *addr = kallsyms_lookup_name(*ksymname); | ||
167 | |||
168 | /* Check for malformed request: (2), (1) and (5) */ | ||
169 | if ((!input_string) || | ||
170 | (strlen(input_string) != KSYM_TRACER_OP_LEN) || | ||
171 | (*addr == 0)) | ||
172 | return -EINVAL;; | ||
173 | |||
174 | ret = ksym_trace_get_access_type(input_string); | ||
175 | |||
176 | return ret; | ||
177 | } | ||
178 | |||
179 | int process_new_ksym_entry(char *ksymname, int op, unsigned long addr) | ||
180 | { | ||
181 | struct trace_ksym *entry; | ||
182 | int ret = -ENOMEM; | ||
183 | |||
184 | if (ksym_filter_entry_count >= KSYM_TRACER_MAX) { | ||
185 | printk(KERN_ERR "ksym_tracer: Maximum limit:(%d) reached. No" | ||
186 | " new requests for tracing can be accepted now.\n", | ||
187 | KSYM_TRACER_MAX); | ||
188 | return -ENOSPC; | ||
189 | } | ||
190 | |||
191 | entry = kzalloc(sizeof(struct trace_ksym), GFP_KERNEL); | ||
192 | if (!entry) | ||
193 | return -ENOMEM; | ||
194 | |||
195 | entry->type = op; | ||
196 | entry->ksym_addr = addr; | ||
197 | entry->len = HW_BREAKPOINT_LEN_4; | ||
198 | |||
199 | ret = -EAGAIN; | ||
200 | entry->ksym_hbp = register_wide_hw_breakpoint(entry->ksym_addr, | ||
201 | entry->len, entry->type, | ||
202 | ksym_hbp_handler, true); | ||
203 | if (IS_ERR(entry->ksym_hbp)) { | ||
204 | entry->ksym_hbp = NULL; | ||
205 | ret = PTR_ERR(entry->ksym_hbp); | ||
206 | } | ||
207 | |||
208 | if (!entry->ksym_hbp) { | ||
209 | printk(KERN_INFO "ksym_tracer request failed. Try again" | ||
210 | " later!!\n"); | ||
211 | goto err; | ||
212 | } | ||
213 | |||
214 | hlist_add_head_rcu(&(entry->ksym_hlist), &ksym_filter_head); | ||
215 | ksym_filter_entry_count++; | ||
216 | |||
217 | return 0; | ||
218 | |||
219 | err: | ||
220 | kfree(entry); | ||
221 | |||
222 | return ret; | ||
223 | } | ||
224 | |||
225 | static ssize_t ksym_trace_filter_read(struct file *filp, char __user *ubuf, | ||
226 | size_t count, loff_t *ppos) | ||
227 | { | ||
228 | struct trace_ksym *entry; | ||
229 | struct hlist_node *node; | ||
230 | struct trace_seq *s; | ||
231 | ssize_t cnt = 0; | ||
232 | int ret; | ||
233 | |||
234 | s = kmalloc(sizeof(*s), GFP_KERNEL); | ||
235 | if (!s) | ||
236 | return -ENOMEM; | ||
237 | trace_seq_init(s); | ||
238 | |||
239 | mutex_lock(&ksym_tracer_mutex); | ||
240 | |||
241 | hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) { | ||
242 | ret = trace_seq_printf(s, "%pS:", (void *)entry->ksym_addr); | ||
243 | if (entry->type == HW_BREAKPOINT_R) | ||
244 | ret = trace_seq_puts(s, "r--\n"); | ||
245 | else if (entry->type == HW_BREAKPOINT_W) | ||
246 | ret = trace_seq_puts(s, "-w-\n"); | ||
247 | else if (entry->type == (HW_BREAKPOINT_W | HW_BREAKPOINT_R)) | ||
248 | ret = trace_seq_puts(s, "rw-\n"); | ||
249 | WARN_ON_ONCE(!ret); | ||
250 | } | ||
251 | |||
252 | cnt = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len); | ||
253 | |||
254 | mutex_unlock(&ksym_tracer_mutex); | ||
255 | |||
256 | kfree(s); | ||
257 | |||
258 | return cnt; | ||
259 | } | ||
260 | |||
261 | static void __ksym_trace_reset(void) | ||
262 | { | ||
263 | struct trace_ksym *entry; | ||
264 | struct hlist_node *node, *node1; | ||
265 | |||
266 | mutex_lock(&ksym_tracer_mutex); | ||
267 | hlist_for_each_entry_safe(entry, node, node1, &ksym_filter_head, | ||
268 | ksym_hlist) { | ||
269 | unregister_wide_hw_breakpoint(entry->ksym_hbp); | ||
270 | ksym_filter_entry_count--; | ||
271 | hlist_del_rcu(&(entry->ksym_hlist)); | ||
272 | synchronize_rcu(); | ||
273 | kfree(entry); | ||
274 | } | ||
275 | mutex_unlock(&ksym_tracer_mutex); | ||
276 | } | ||
277 | |||
278 | static ssize_t ksym_trace_filter_write(struct file *file, | ||
279 | const char __user *buffer, | ||
280 | size_t count, loff_t *ppos) | ||
281 | { | ||
282 | struct trace_ksym *entry; | ||
283 | struct hlist_node *node; | ||
284 | char *input_string, *ksymname = NULL; | ||
285 | unsigned long ksym_addr = 0; | ||
286 | int ret, op, changed = 0; | ||
287 | |||
288 | input_string = kzalloc(count + 1, GFP_KERNEL); | ||
289 | if (!input_string) | ||
290 | return -ENOMEM; | ||
291 | |||
292 | if (copy_from_user(input_string, buffer, count)) { | ||
293 | kfree(input_string); | ||
294 | return -EFAULT; | ||
295 | } | ||
296 | input_string[count] = '\0'; | ||
297 | |||
298 | strstrip(input_string); | ||
299 | |||
300 | /* | ||
301 | * Clear all breakpoints if: | ||
302 | * 1: echo > ksym_trace_filter | ||
303 | * 2: echo 0 > ksym_trace_filter | ||
304 | * 3: echo "*:---" > ksym_trace_filter | ||
305 | */ | ||
306 | if (!input_string[0] || !strcmp(input_string, "0") || | ||
307 | !strcmp(input_string, "*:---")) { | ||
308 | __ksym_trace_reset(); | ||
309 | kfree(input_string); | ||
310 | return count; | ||
311 | } | ||
312 | |||
313 | ret = op = parse_ksym_trace_str(input_string, &ksymname, &ksym_addr); | ||
314 | if (ret < 0) { | ||
315 | kfree(input_string); | ||
316 | return ret; | ||
317 | } | ||
318 | |||
319 | mutex_lock(&ksym_tracer_mutex); | ||
320 | |||
321 | ret = -EINVAL; | ||
322 | hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) { | ||
323 | if (entry->ksym_addr == ksym_addr) { | ||
324 | /* Check for malformed request: (6) */ | ||
325 | if (entry->type != op) | ||
326 | changed = 1; | ||
327 | else | ||
328 | goto out; | ||
329 | break; | ||
330 | } | ||
331 | } | ||
332 | if (changed) { | ||
333 | unregister_wide_hw_breakpoint(entry->ksym_hbp); | ||
334 | entry->type = op; | ||
335 | if (op > 0) { | ||
336 | entry->ksym_hbp = | ||
337 | register_wide_hw_breakpoint(entry->ksym_addr, | ||
338 | entry->len, entry->type, | ||
339 | ksym_hbp_handler, true); | ||
340 | if (IS_ERR(entry->ksym_hbp)) | ||
341 | entry->ksym_hbp = NULL; | ||
342 | if (!entry->ksym_hbp) | ||
343 | goto out; | ||
344 | } | ||
345 | ksym_filter_entry_count--; | ||
346 | hlist_del_rcu(&(entry->ksym_hlist)); | ||
347 | synchronize_rcu(); | ||
348 | kfree(entry); | ||
349 | ret = 0; | ||
350 | goto out; | ||
351 | } else { | ||
352 | /* Check for malformed request: (4) */ | ||
353 | if (op == 0) | ||
354 | goto out; | ||
355 | ret = process_new_ksym_entry(ksymname, op, ksym_addr); | ||
356 | } | ||
357 | out: | ||
358 | mutex_unlock(&ksym_tracer_mutex); | ||
359 | |||
360 | kfree(input_string); | ||
361 | |||
362 | if (!ret) | ||
363 | ret = count; | ||
364 | return ret; | ||
365 | } | ||
366 | |||
367 | static const struct file_operations ksym_tracing_fops = { | ||
368 | .open = tracing_open_generic, | ||
369 | .read = ksym_trace_filter_read, | ||
370 | .write = ksym_trace_filter_write, | ||
371 | }; | ||
372 | |||
373 | static void ksym_trace_reset(struct trace_array *tr) | ||
374 | { | ||
375 | ksym_tracing_enabled = 0; | ||
376 | __ksym_trace_reset(); | ||
377 | } | ||
378 | |||
379 | static int ksym_trace_init(struct trace_array *tr) | ||
380 | { | ||
381 | int cpu, ret = 0; | ||
382 | |||
383 | for_each_online_cpu(cpu) | ||
384 | tracing_reset(tr, cpu); | ||
385 | ksym_tracing_enabled = 1; | ||
386 | ksym_trace_array = tr; | ||
387 | |||
388 | return ret; | ||
389 | } | ||
390 | |||
391 | static void ksym_trace_print_header(struct seq_file *m) | ||
392 | { | ||
393 | seq_puts(m, | ||
394 | "# TASK-PID CPU# Symbol " | ||
395 | "Type Function\n"); | ||
396 | seq_puts(m, | ||
397 | "# | | | " | ||
398 | " | |\n"); | ||
399 | } | ||
400 | |||
401 | static enum print_line_t ksym_trace_output(struct trace_iterator *iter) | ||
402 | { | ||
403 | struct trace_entry *entry = iter->ent; | ||
404 | struct trace_seq *s = &iter->seq; | ||
405 | struct ksym_trace_entry *field; | ||
406 | char str[KSYM_SYMBOL_LEN]; | ||
407 | int ret; | ||
408 | |||
409 | if (entry->type != TRACE_KSYM) | ||
410 | return TRACE_TYPE_UNHANDLED; | ||
411 | |||
412 | trace_assign_type(field, entry); | ||
413 | |||
414 | ret = trace_seq_printf(s, "%11s-%-5d [%03d] %pS", field->cmd, | ||
415 | entry->pid, iter->cpu, (char *)field->addr); | ||
416 | if (!ret) | ||
417 | return TRACE_TYPE_PARTIAL_LINE; | ||
418 | |||
419 | switch (field->type) { | ||
420 | case HW_BREAKPOINT_R: | ||
421 | ret = trace_seq_printf(s, " R "); | ||
422 | break; | ||
423 | case HW_BREAKPOINT_W: | ||
424 | ret = trace_seq_printf(s, " W "); | ||
425 | break; | ||
426 | case HW_BREAKPOINT_R | HW_BREAKPOINT_W: | ||
427 | ret = trace_seq_printf(s, " RW "); | ||
428 | break; | ||
429 | default: | ||
430 | return TRACE_TYPE_PARTIAL_LINE; | ||
431 | } | ||
432 | |||
433 | if (!ret) | ||
434 | return TRACE_TYPE_PARTIAL_LINE; | ||
435 | |||
436 | sprint_symbol(str, field->ip); | ||
437 | ret = trace_seq_printf(s, "%s\n", str); | ||
438 | if (!ret) | ||
439 | return TRACE_TYPE_PARTIAL_LINE; | ||
440 | |||
441 | return TRACE_TYPE_HANDLED; | ||
442 | } | ||
443 | |||
444 | struct tracer ksym_tracer __read_mostly = | ||
445 | { | ||
446 | .name = "ksym_tracer", | ||
447 | .init = ksym_trace_init, | ||
448 | .reset = ksym_trace_reset, | ||
449 | #ifdef CONFIG_FTRACE_SELFTEST | ||
450 | .selftest = trace_selftest_startup_ksym, | ||
451 | #endif | ||
452 | .print_header = ksym_trace_print_header, | ||
453 | .print_line = ksym_trace_output | ||
454 | }; | ||
455 | |||
456 | __init static int init_ksym_trace(void) | ||
457 | { | ||
458 | struct dentry *d_tracer; | ||
459 | struct dentry *entry; | ||
460 | |||
461 | d_tracer = tracing_init_dentry(); | ||
462 | ksym_filter_entry_count = 0; | ||
463 | |||
464 | entry = debugfs_create_file("ksym_trace_filter", 0644, d_tracer, | ||
465 | NULL, &ksym_tracing_fops); | ||
466 | if (!entry) | ||
467 | pr_warning("Could not create debugfs " | ||
468 | "'ksym_trace_filter' file\n"); | ||
469 | |||
470 | return register_tracer(&ksym_tracer); | ||
471 | } | ||
472 | device_initcall(init_ksym_trace); | ||
473 | |||
474 | |||
475 | #ifdef CONFIG_PROFILE_KSYM_TRACER | ||
476 | static int ksym_tracer_stat_headers(struct seq_file *m) | ||
477 | { | ||
478 | seq_puts(m, " Access Type "); | ||
479 | seq_puts(m, " Symbol Counter\n"); | ||
480 | seq_puts(m, " ----------- "); | ||
481 | seq_puts(m, " ------ -------\n"); | ||
482 | return 0; | ||
483 | } | ||
484 | |||
485 | static int ksym_tracer_stat_show(struct seq_file *m, void *v) | ||
486 | { | ||
487 | struct hlist_node *stat = v; | ||
488 | struct trace_ksym *entry; | ||
489 | int access_type = 0; | ||
490 | char fn_name[KSYM_NAME_LEN]; | ||
491 | |||
492 | entry = hlist_entry(stat, struct trace_ksym, ksym_hlist); | ||
493 | |||
494 | access_type = entry->type; | ||
495 | |||
496 | switch (access_type) { | ||
497 | case HW_BREAKPOINT_R: | ||
498 | seq_puts(m, " R "); | ||
499 | break; | ||
500 | case HW_BREAKPOINT_W: | ||
501 | seq_puts(m, " W "); | ||
502 | break; | ||
503 | case HW_BREAKPOINT_R | HW_BREAKPOINT_W: | ||
504 | seq_puts(m, " RW "); | ||
505 | break; | ||
506 | default: | ||
507 | seq_puts(m, " NA "); | ||
508 | } | ||
509 | |||
510 | if (lookup_symbol_name(entry->ksym_addr, fn_name) >= 0) | ||
511 | seq_printf(m, " %-36s", fn_name); | ||
512 | else | ||
513 | seq_printf(m, " %-36s", "<NA>"); | ||
514 | seq_printf(m, " %15lu\n", entry->counter); | ||
515 | |||
516 | return 0; | ||
517 | } | ||
518 | |||
519 | static void *ksym_tracer_stat_start(struct tracer_stat *trace) | ||
520 | { | ||
521 | return ksym_filter_head.first; | ||
522 | } | ||
523 | |||
524 | static void * | ||
525 | ksym_tracer_stat_next(void *v, int idx) | ||
526 | { | ||
527 | struct hlist_node *stat = v; | ||
528 | |||
529 | return stat->next; | ||
530 | } | ||
531 | |||
532 | static struct tracer_stat ksym_tracer_stats = { | ||
533 | .name = "ksym_tracer", | ||
534 | .stat_start = ksym_tracer_stat_start, | ||
535 | .stat_next = ksym_tracer_stat_next, | ||
536 | .stat_headers = ksym_tracer_stat_headers, | ||
537 | .stat_show = ksym_tracer_stat_show | ||
538 | }; | ||
539 | |||
540 | __init static int ksym_tracer_stat_init(void) | ||
541 | { | ||
542 | int ret; | ||
543 | |||
544 | ret = register_stat_tracer(&ksym_tracer_stats); | ||
545 | if (ret) { | ||
546 | printk(KERN_WARNING "Warning: could not register " | ||
547 | "ksym tracer stats\n"); | ||
548 | return 1; | ||
549 | } | ||
550 | |||
551 | return 0; | ||
552 | } | ||
553 | fs_initcall(ksym_tracer_stat_init); | ||
554 | #endif /* CONFIG_PROFILE_KSYM_TRACER */ | ||
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index d2cdbabb4ead..dc98309e839a 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c | |||
@@ -17,6 +17,7 @@ static inline int trace_valid_entry(struct trace_entry *entry) | |||
17 | case TRACE_GRAPH_ENT: | 17 | case TRACE_GRAPH_ENT: |
18 | case TRACE_GRAPH_RET: | 18 | case TRACE_GRAPH_RET: |
19 | case TRACE_HW_BRANCHES: | 19 | case TRACE_HW_BRANCHES: |
20 | case TRACE_KSYM: | ||
20 | return 1; | 21 | return 1; |
21 | } | 22 | } |
22 | return 0; | 23 | return 0; |
@@ -808,3 +809,57 @@ trace_selftest_startup_hw_branches(struct tracer *trace, | |||
808 | return ret; | 809 | return ret; |
809 | } | 810 | } |
810 | #endif /* CONFIG_HW_BRANCH_TRACER */ | 811 | #endif /* CONFIG_HW_BRANCH_TRACER */ |
812 | |||
813 | #ifdef CONFIG_KSYM_TRACER | ||
814 | static int ksym_selftest_dummy; | ||
815 | |||
816 | int | ||
817 | trace_selftest_startup_ksym(struct tracer *trace, struct trace_array *tr) | ||
818 | { | ||
819 | unsigned long count; | ||
820 | int ret; | ||
821 | |||
822 | /* start the tracing */ | ||
823 | ret = tracer_init(trace, tr); | ||
824 | if (ret) { | ||
825 | warn_failed_init_tracer(trace, ret); | ||
826 | return ret; | ||
827 | } | ||
828 | |||
829 | ksym_selftest_dummy = 0; | ||
830 | /* Register the read-write tracing request */ | ||
831 | |||
832 | ret = process_new_ksym_entry("ksym_selftest_dummy", | ||
833 | HW_BREAKPOINT_R | HW_BREAKPOINT_W, | ||
834 | (unsigned long)(&ksym_selftest_dummy)); | ||
835 | |||
836 | if (ret < 0) { | ||
837 | printk(KERN_CONT "ksym_trace read-write startup test failed\n"); | ||
838 | goto ret_path; | ||
839 | } | ||
840 | /* Perform a read and a write operation over the dummy variable to | ||
841 | * trigger the tracer | ||
842 | */ | ||
843 | if (ksym_selftest_dummy == 0) | ||
844 | ksym_selftest_dummy++; | ||
845 | |||
846 | /* stop the tracing. */ | ||
847 | tracing_stop(); | ||
848 | /* check the trace buffer */ | ||
849 | ret = trace_test_buffer(tr, &count); | ||
850 | trace->reset(tr); | ||
851 | tracing_start(); | ||
852 | |||
853 | /* read & write operations - one each is performed on the dummy variable | ||
854 | * triggering two entries in the trace buffer | ||
855 | */ | ||
856 | if (!ret && count != 2) { | ||
857 | printk(KERN_CONT "Ksym tracer startup test failed"); | ||
858 | ret = -1; | ||
859 | } | ||
860 | |||
861 | ret_path: | ||
862 | return ret; | ||
863 | } | ||
864 | #endif /* CONFIG_KSYM_TRACER */ | ||
865 | |||
diff --git a/samples/Kconfig b/samples/Kconfig index b92bde3c6a89..e4be84ac3d38 100644 --- a/samples/Kconfig +++ b/samples/Kconfig | |||
@@ -40,5 +40,11 @@ config SAMPLE_KRETPROBES | |||
40 | default m | 40 | default m |
41 | depends on SAMPLE_KPROBES && KRETPROBES | 41 | depends on SAMPLE_KPROBES && KRETPROBES |
42 | 42 | ||
43 | config SAMPLE_HW_BREAKPOINT | ||
44 | tristate "Build kernel hardware breakpoint examples -- loadable module only" | ||
45 | depends on HAVE_HW_BREAKPOINT && m | ||
46 | help | ||
47 | This builds kernel hardware breakpoint example modules. | ||
48 | |||
43 | endif # SAMPLES | 49 | endif # SAMPLES |
44 | 50 | ||
diff --git a/samples/Makefile b/samples/Makefile index 43343a03b1f4..0f15e6d77fd6 100644 --- a/samples/Makefile +++ b/samples/Makefile | |||
@@ -1,3 +1,4 @@ | |||
1 | # Makefile for Linux samples code | 1 | # Makefile for Linux samples code |
2 | 2 | ||
3 | obj-$(CONFIG_SAMPLES) += kobject/ kprobes/ tracepoints/ trace_events/ | 3 | obj-$(CONFIG_SAMPLES) += kobject/ kprobes/ tracepoints/ trace_events/ \ |
4 | hw_breakpoint/ | ||
diff --git a/samples/hw_breakpoint/Makefile b/samples/hw_breakpoint/Makefile new file mode 100644 index 000000000000..0f5c31c2fc47 --- /dev/null +++ b/samples/hw_breakpoint/Makefile | |||
@@ -0,0 +1 @@ | |||
obj-$(CONFIG_SAMPLE_HW_BREAKPOINT) += data_breakpoint.o | |||
diff --git a/samples/hw_breakpoint/data_breakpoint.c b/samples/hw_breakpoint/data_breakpoint.c new file mode 100644 index 000000000000..5bc9819a819e --- /dev/null +++ b/samples/hw_breakpoint/data_breakpoint.c | |||
@@ -0,0 +1,88 @@ | |||
1 | /* | ||
2 | * data_breakpoint.c - Sample HW Breakpoint file to watch kernel data address | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License as published by | ||
6 | * the Free Software Foundation; either version 2 of the License, or | ||
7 | * (at your option) any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write to the Free Software | ||
16 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
17 | * | ||
18 | * usage: insmod data_breakpoint.ko ksym=<ksym_name> | ||
19 | * | ||
20 | * This file is a kernel module that places a breakpoint over ksym_name kernel | ||
21 | * variable using Hardware Breakpoint register. The corresponding handler which | ||
22 | * prints a backtrace is invoked everytime a write operation is performed on | ||
23 | * that variable. | ||
24 | * | ||
25 | * Copyright (C) IBM Corporation, 2009 | ||
26 | */ | ||
27 | #include <linux/module.h> /* Needed by all modules */ | ||
28 | #include <linux/kernel.h> /* Needed for KERN_INFO */ | ||
29 | #include <linux/init.h> /* Needed for the macros */ | ||
30 | #include <linux/kallsyms.h> | ||
31 | |||
32 | #include <linux/perf_event.h> | ||
33 | #include <linux/hw_breakpoint.h> | ||
34 | |||
35 | struct perf_event **sample_hbp; | ||
36 | |||
37 | static char ksym_name[KSYM_NAME_LEN] = "pid_max"; | ||
38 | module_param_string(ksym, ksym_name, KSYM_NAME_LEN, S_IRUGO); | ||
39 | MODULE_PARM_DESC(ksym, "Kernel symbol to monitor; this module will report any" | ||
40 | " write operations on the kernel symbol"); | ||
41 | |||
42 | static void sample_hbp_handler(struct perf_event *temp, void *data) | ||
43 | { | ||
44 | printk(KERN_INFO "%s value is changed\n", ksym_name); | ||
45 | dump_stack(); | ||
46 | printk(KERN_INFO "Dump stack from sample_hbp_handler\n"); | ||
47 | } | ||
48 | |||
49 | static int __init hw_break_module_init(void) | ||
50 | { | ||
51 | int ret; | ||
52 | unsigned long addr; | ||
53 | |||
54 | addr = kallsyms_lookup_name(ksym_name); | ||
55 | |||
56 | sample_hbp = register_wide_hw_breakpoint(addr, HW_BREAKPOINT_LEN_4, | ||
57 | HW_BREAKPOINT_W | HW_BREAKPOINT_R, | ||
58 | sample_hbp_handler, true); | ||
59 | if (IS_ERR(sample_hbp)) { | ||
60 | ret = PTR_ERR(sample_hbp); | ||
61 | goto fail; | ||
62 | } else if (!sample_hbp) { | ||
63 | ret = -EINVAL; | ||
64 | goto fail; | ||
65 | } | ||
66 | |||
67 | printk(KERN_INFO "HW Breakpoint for %s write installed\n", ksym_name); | ||
68 | |||
69 | return 0; | ||
70 | |||
71 | fail: | ||
72 | printk(KERN_INFO "Breakpoint registration failed\n"); | ||
73 | |||
74 | return ret; | ||
75 | } | ||
76 | |||
77 | static void __exit hw_break_module_exit(void) | ||
78 | { | ||
79 | unregister_wide_hw_breakpoint(sample_hbp); | ||
80 | printk(KERN_INFO "HW Breakpoint for %s write uninstalled\n", ksym_name); | ||
81 | } | ||
82 | |||
83 | module_init(hw_break_module_init); | ||
84 | module_exit(hw_break_module_exit); | ||
85 | |||
86 | MODULE_LICENSE("GPL"); | ||
87 | MODULE_AUTHOR("K.Prasad"); | ||
88 | MODULE_DESCRIPTION("ksym breakpoint"); | ||