diff options
33 files changed, 1985 insertions, 195 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index 78a35e9dc104..1adf2d0e6356 100644 --- a/arch/Kconfig +++ b/arch/Kconfig | |||
@@ -112,3 +112,7 @@ config HAVE_DMA_API_DEBUG | |||
112 | 112 | ||
113 | config HAVE_DEFAULT_NO_SPIN_MUTEXES | 113 | config HAVE_DEFAULT_NO_SPIN_MUTEXES |
114 | bool | 114 | bool |
115 | |||
116 | config HAVE_HW_BREAKPOINT | ||
117 | bool | ||
118 | |||
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index cf42fc305419..52421d52f21e 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -46,6 +46,7 @@ config X86 | |||
46 | select HAVE_KERNEL_GZIP | 46 | select HAVE_KERNEL_GZIP |
47 | select HAVE_KERNEL_BZIP2 | 47 | select HAVE_KERNEL_BZIP2 |
48 | select HAVE_KERNEL_LZMA | 48 | select HAVE_KERNEL_LZMA |
49 | select HAVE_HW_BREAKPOINT | ||
49 | select HAVE_ARCH_KMEMCHECK | 50 | select HAVE_ARCH_KMEMCHECK |
50 | 51 | ||
51 | config OUTPUT_FORMAT | 52 | config OUTPUT_FORMAT |
diff --git a/arch/x86/include/asm/a.out-core.h b/arch/x86/include/asm/a.out-core.h index bb70e397aa84..fc4685dd6e4d 100644 --- a/arch/x86/include/asm/a.out-core.h +++ b/arch/x86/include/asm/a.out-core.h | |||
@@ -32,10 +32,10 @@ static inline void aout_dump_thread(struct pt_regs *regs, struct user *dump) | |||
32 | >> PAGE_SHIFT; | 32 | >> PAGE_SHIFT; |
33 | dump->u_dsize -= dump->u_tsize; | 33 | dump->u_dsize -= dump->u_tsize; |
34 | dump->u_ssize = 0; | 34 | dump->u_ssize = 0; |
35 | dump->u_debugreg[0] = current->thread.debugreg0; | 35 | dump->u_debugreg[0] = current->thread.debugreg[0]; |
36 | dump->u_debugreg[1] = current->thread.debugreg1; | 36 | dump->u_debugreg[1] = current->thread.debugreg[1]; |
37 | dump->u_debugreg[2] = current->thread.debugreg2; | 37 | dump->u_debugreg[2] = current->thread.debugreg[2]; |
38 | dump->u_debugreg[3] = current->thread.debugreg3; | 38 | dump->u_debugreg[3] = current->thread.debugreg[3]; |
39 | dump->u_debugreg[4] = 0; | 39 | dump->u_debugreg[4] = 0; |
40 | dump->u_debugreg[5] = 0; | 40 | dump->u_debugreg[5] = 0; |
41 | dump->u_debugreg[6] = current->thread.debugreg6; | 41 | dump->u_debugreg[6] = current->thread.debugreg6; |
diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h index 3ea6f37be9e2..23439fbb1d0e 100644 --- a/arch/x86/include/asm/debugreg.h +++ b/arch/x86/include/asm/debugreg.h | |||
@@ -18,6 +18,7 @@ | |||
18 | #define DR_TRAP1 (0x2) /* db1 */ | 18 | #define DR_TRAP1 (0x2) /* db1 */ |
19 | #define DR_TRAP2 (0x4) /* db2 */ | 19 | #define DR_TRAP2 (0x4) /* db2 */ |
20 | #define DR_TRAP3 (0x8) /* db3 */ | 20 | #define DR_TRAP3 (0x8) /* db3 */ |
21 | #define DR_TRAP_BITS (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3) | ||
21 | 22 | ||
22 | #define DR_STEP (0x4000) /* single-step */ | 23 | #define DR_STEP (0x4000) /* single-step */ |
23 | #define DR_SWITCH (0x8000) /* task switch */ | 24 | #define DR_SWITCH (0x8000) /* task switch */ |
@@ -49,6 +50,8 @@ | |||
49 | 50 | ||
50 | #define DR_LOCAL_ENABLE_SHIFT 0 /* Extra shift to the local enable bit */ | 51 | #define DR_LOCAL_ENABLE_SHIFT 0 /* Extra shift to the local enable bit */ |
51 | #define DR_GLOBAL_ENABLE_SHIFT 1 /* Extra shift to the global enable bit */ | 52 | #define DR_GLOBAL_ENABLE_SHIFT 1 /* Extra shift to the global enable bit */ |
53 | #define DR_LOCAL_ENABLE (0x1) /* Local enable for reg 0 */ | ||
54 | #define DR_GLOBAL_ENABLE (0x2) /* Global enable for reg 0 */ | ||
52 | #define DR_ENABLE_SIZE 2 /* 2 enable bits per register */ | 55 | #define DR_ENABLE_SIZE 2 /* 2 enable bits per register */ |
53 | 56 | ||
54 | #define DR_LOCAL_ENABLE_MASK (0x55) /* Set local bits for all 4 regs */ | 57 | #define DR_LOCAL_ENABLE_MASK (0x55) /* Set local bits for all 4 regs */ |
@@ -67,4 +70,30 @@ | |||
67 | #define DR_LOCAL_SLOWDOWN (0x100) /* Local slow the pipeline */ | 70 | #define DR_LOCAL_SLOWDOWN (0x100) /* Local slow the pipeline */ |
68 | #define DR_GLOBAL_SLOWDOWN (0x200) /* Global slow the pipeline */ | 71 | #define DR_GLOBAL_SLOWDOWN (0x200) /* Global slow the pipeline */ |
69 | 72 | ||
73 | /* | ||
74 | * HW breakpoint additions | ||
75 | */ | ||
76 | #ifdef __KERNEL__ | ||
77 | |||
78 | /* For process management */ | ||
79 | extern void flush_thread_hw_breakpoint(struct task_struct *tsk); | ||
80 | extern int copy_thread_hw_breakpoint(struct task_struct *tsk, | ||
81 | struct task_struct *child, unsigned long clone_flags); | ||
82 | |||
83 | /* For CPU management */ | ||
84 | extern void load_debug_registers(void); | ||
85 | static inline void hw_breakpoint_disable(void) | ||
86 | { | ||
87 | /* Zero the control register for HW Breakpoint */ | ||
88 | set_debugreg(0UL, 7); | ||
89 | |||
90 | /* Zero-out the individual HW breakpoint address registers */ | ||
91 | set_debugreg(0UL, 0); | ||
92 | set_debugreg(0UL, 1); | ||
93 | set_debugreg(0UL, 2); | ||
94 | set_debugreg(0UL, 3); | ||
95 | } | ||
96 | |||
97 | #endif /* __KERNEL__ */ | ||
98 | |||
70 | #endif /* _ASM_X86_DEBUGREG_H */ | 99 | #endif /* _ASM_X86_DEBUGREG_H */ |
diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h new file mode 100644 index 000000000000..1acb4d45de70 --- /dev/null +++ b/arch/x86/include/asm/hw_breakpoint.h | |||
@@ -0,0 +1,55 @@ | |||
1 | #ifndef _I386_HW_BREAKPOINT_H | ||
2 | #define _I386_HW_BREAKPOINT_H | ||
3 | |||
4 | #ifdef __KERNEL__ | ||
5 | #define __ARCH_HW_BREAKPOINT_H | ||
6 | |||
7 | struct arch_hw_breakpoint { | ||
8 | char *name; /* Contains name of the symbol to set bkpt */ | ||
9 | unsigned long address; | ||
10 | u8 len; | ||
11 | u8 type; | ||
12 | }; | ||
13 | |||
14 | #include <linux/kdebug.h> | ||
15 | #include <asm-generic/hw_breakpoint.h> | ||
16 | |||
17 | /* Available HW breakpoint length encodings */ | ||
18 | #define HW_BREAKPOINT_LEN_1 0x40 | ||
19 | #define HW_BREAKPOINT_LEN_2 0x44 | ||
20 | #define HW_BREAKPOINT_LEN_4 0x4c | ||
21 | #define HW_BREAKPOINT_LEN_EXECUTE 0x40 | ||
22 | |||
23 | #ifdef CONFIG_X86_64 | ||
24 | #define HW_BREAKPOINT_LEN_8 0x48 | ||
25 | #endif | ||
26 | |||
27 | /* Available HW breakpoint type encodings */ | ||
28 | |||
29 | /* trigger on instruction execute */ | ||
30 | #define HW_BREAKPOINT_EXECUTE 0x80 | ||
31 | /* trigger on memory write */ | ||
32 | #define HW_BREAKPOINT_WRITE 0x81 | ||
33 | /* trigger on memory read or write */ | ||
34 | #define HW_BREAKPOINT_RW 0x83 | ||
35 | |||
36 | /* Total number of available HW breakpoint registers */ | ||
37 | #define HBP_NUM 4 | ||
38 | |||
39 | extern struct hw_breakpoint *hbp_kernel[HBP_NUM]; | ||
40 | DECLARE_PER_CPU(struct hw_breakpoint*, this_hbp_kernel[HBP_NUM]); | ||
41 | extern unsigned int hbp_user_refcount[HBP_NUM]; | ||
42 | |||
43 | extern void arch_install_thread_hw_breakpoint(struct task_struct *tsk); | ||
44 | extern void arch_uninstall_thread_hw_breakpoint(void); | ||
45 | extern int arch_check_va_in_userspace(unsigned long va, u8 hbp_len); | ||
46 | extern int arch_validate_hwbkpt_settings(struct hw_breakpoint *bp, | ||
47 | struct task_struct *tsk); | ||
48 | extern void arch_update_user_hw_breakpoint(int pos, struct task_struct *tsk); | ||
49 | extern void arch_flush_thread_hw_breakpoint(struct task_struct *tsk); | ||
50 | extern void arch_update_kernel_hw_breakpoint(void *); | ||
51 | extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused, | ||
52 | unsigned long val, void *data); | ||
53 | #endif /* __KERNEL__ */ | ||
54 | #endif /* _I386_HW_BREAKPOINT_H */ | ||
55 | |||
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index c7768269b1cf..2b03f700d3f2 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h | |||
@@ -29,6 +29,7 @@ struct mm_struct; | |||
29 | #include <linux/threads.h> | 29 | #include <linux/threads.h> |
30 | #include <linux/init.h> | 30 | #include <linux/init.h> |
31 | 31 | ||
32 | #define HBP_NUM 4 | ||
32 | /* | 33 | /* |
33 | * Default implementation of macro that returns current | 34 | * Default implementation of macro that returns current |
34 | * instruction pointer ("program counter"). | 35 | * instruction pointer ("program counter"). |
@@ -433,12 +434,11 @@ struct thread_struct { | |||
433 | #endif | 434 | #endif |
434 | unsigned long gs; | 435 | unsigned long gs; |
435 | /* Hardware debugging registers: */ | 436 | /* Hardware debugging registers: */ |
436 | unsigned long debugreg0; | 437 | unsigned long debugreg[HBP_NUM]; |
437 | unsigned long debugreg1; | ||
438 | unsigned long debugreg2; | ||
439 | unsigned long debugreg3; | ||
440 | unsigned long debugreg6; | 438 | unsigned long debugreg6; |
441 | unsigned long debugreg7; | 439 | unsigned long debugreg7; |
440 | /* Hardware breakpoint info */ | ||
441 | struct hw_breakpoint *hbp[HBP_NUM]; | ||
442 | /* Fault info: */ | 442 | /* Fault info: */ |
443 | unsigned long cr2; | 443 | unsigned long cr2; |
444 | unsigned long trap_no; | 444 | unsigned long trap_no; |
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index f3477bb84566..b67efd1cf59b 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -36,7 +36,7 @@ obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o | |||
36 | obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o | 36 | obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o |
37 | obj-y += bootflag.o e820.o | 37 | obj-y += bootflag.o e820.o |
38 | obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o | 38 | obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o |
39 | obj-y += alternative.o i8253.o pci-nommu.o | 39 | obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o |
40 | obj-y += tsc.o io_delay.o rtc.o | 40 | obj-y += tsc.o io_delay.o rtc.o |
41 | 41 | ||
42 | obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o | 42 | obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o |
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c new file mode 100644 index 000000000000..51d959528b1d --- /dev/null +++ b/arch/x86/kernel/hw_breakpoint.c | |||
@@ -0,0 +1,391 @@ | |||
1 | /* | ||
2 | * This program is free software; you can redistribute it and/or modify | ||
3 | * it under the terms of the GNU General Public License as published by | ||
4 | * the Free Software Foundation; either version 2 of the License, or | ||
5 | * (at your option) any later version. | ||
6 | * | ||
7 | * This program is distributed in the hope that it will be useful, | ||
8 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
10 | * GNU General Public License for more details. | ||
11 | * | ||
12 | * You should have received a copy of the GNU General Public License | ||
13 | * along with this program; if not, write to the Free Software | ||
14 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
15 | * | ||
16 | * Copyright (C) 2007 Alan Stern | ||
17 | * Copyright (C) 2009 IBM Corporation | ||
18 | */ | ||
19 | |||
20 | /* | ||
21 | * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility, | ||
22 | * using the CPU's debug registers. | ||
23 | */ | ||
24 | |||
25 | #include <linux/irqflags.h> | ||
26 | #include <linux/notifier.h> | ||
27 | #include <linux/kallsyms.h> | ||
28 | #include <linux/kprobes.h> | ||
29 | #include <linux/percpu.h> | ||
30 | #include <linux/kdebug.h> | ||
31 | #include <linux/kernel.h> | ||
32 | #include <linux/module.h> | ||
33 | #include <linux/sched.h> | ||
34 | #include <linux/init.h> | ||
35 | #include <linux/smp.h> | ||
36 | |||
37 | #include <asm/hw_breakpoint.h> | ||
38 | #include <asm/processor.h> | ||
39 | #include <asm/debugreg.h> | ||
40 | |||
41 | /* Unmasked kernel DR7 value */ | ||
42 | static unsigned long kdr7; | ||
43 | |||
44 | /* | ||
45 | * Masks for the bits corresponding to registers DR0 - DR3 in DR7 register. | ||
46 | * Used to clear and verify the status of bits corresponding to DR0 - DR3 | ||
47 | */ | ||
48 | static const unsigned long dr7_masks[HBP_NUM] = { | ||
49 | 0x000f0003, /* LEN0, R/W0, G0, L0 */ | ||
50 | 0x00f0000c, /* LEN1, R/W1, G1, L1 */ | ||
51 | 0x0f000030, /* LEN2, R/W2, G2, L2 */ | ||
52 | 0xf00000c0 /* LEN3, R/W3, G3, L3 */ | ||
53 | }; | ||
54 | |||
55 | |||
56 | /* | ||
57 | * Encode the length, type, Exact, and Enable bits for a particular breakpoint | ||
58 | * as stored in debug register 7. | ||
59 | */ | ||
60 | static unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type) | ||
61 | { | ||
62 | unsigned long bp_info; | ||
63 | |||
64 | bp_info = (len | type) & 0xf; | ||
65 | bp_info <<= (DR_CONTROL_SHIFT + drnum * DR_CONTROL_SIZE); | ||
66 | bp_info |= (DR_GLOBAL_ENABLE << (drnum * DR_ENABLE_SIZE)) | | ||
67 | DR_GLOBAL_SLOWDOWN; | ||
68 | return bp_info; | ||
69 | } | ||
70 | |||
71 | void arch_update_kernel_hw_breakpoint(void *unused) | ||
72 | { | ||
73 | struct hw_breakpoint *bp; | ||
74 | int i, cpu = get_cpu(); | ||
75 | unsigned long temp_kdr7 = 0; | ||
76 | |||
77 | /* Don't allow debug exceptions while we update the registers */ | ||
78 | set_debugreg(0UL, 7); | ||
79 | |||
80 | for (i = hbp_kernel_pos; i < HBP_NUM; i++) { | ||
81 | per_cpu(this_hbp_kernel[i], cpu) = bp = hbp_kernel[i]; | ||
82 | if (bp) { | ||
83 | temp_kdr7 |= encode_dr7(i, bp->info.len, bp->info.type); | ||
84 | set_debugreg(bp->info.address, i); | ||
85 | } | ||
86 | } | ||
87 | |||
88 | /* No need to set DR6. Update the debug registers with kernel-space | ||
89 | * breakpoint values from kdr7 and user-space requests from the | ||
90 | * current process | ||
91 | */ | ||
92 | kdr7 = temp_kdr7; | ||
93 | set_debugreg(kdr7 | current->thread.debugreg7, 7); | ||
94 | put_cpu(); | ||
95 | } | ||
96 | |||
97 | /* | ||
98 | * Install the thread breakpoints in their debug registers. | ||
99 | */ | ||
100 | void arch_install_thread_hw_breakpoint(struct task_struct *tsk) | ||
101 | { | ||
102 | struct thread_struct *thread = &(tsk->thread); | ||
103 | |||
104 | switch (hbp_kernel_pos) { | ||
105 | case 4: | ||
106 | set_debugreg(thread->debugreg[3], 3); | ||
107 | case 3: | ||
108 | set_debugreg(thread->debugreg[2], 2); | ||
109 | case 2: | ||
110 | set_debugreg(thread->debugreg[1], 1); | ||
111 | case 1: | ||
112 | set_debugreg(thread->debugreg[0], 0); | ||
113 | default: | ||
114 | break; | ||
115 | } | ||
116 | |||
117 | /* No need to set DR6 */ | ||
118 | set_debugreg((kdr7 | thread->debugreg7), 7); | ||
119 | } | ||
120 | |||
121 | /* | ||
122 | * Install the debug register values for just the kernel, no thread. | ||
123 | */ | ||
124 | void arch_uninstall_thread_hw_breakpoint() | ||
125 | { | ||
126 | /* Clear the user-space portion of debugreg7 by setting only kdr7 */ | ||
127 | set_debugreg(kdr7, 7); | ||
128 | |||
129 | } | ||
130 | |||
131 | static int get_hbp_len(u8 hbp_len) | ||
132 | { | ||
133 | unsigned int len_in_bytes = 0; | ||
134 | |||
135 | switch (hbp_len) { | ||
136 | case HW_BREAKPOINT_LEN_1: | ||
137 | len_in_bytes = 1; | ||
138 | break; | ||
139 | case HW_BREAKPOINT_LEN_2: | ||
140 | len_in_bytes = 2; | ||
141 | break; | ||
142 | case HW_BREAKPOINT_LEN_4: | ||
143 | len_in_bytes = 4; | ||
144 | break; | ||
145 | #ifdef CONFIG_X86_64 | ||
146 | case HW_BREAKPOINT_LEN_8: | ||
147 | len_in_bytes = 8; | ||
148 | break; | ||
149 | #endif | ||
150 | } | ||
151 | return len_in_bytes; | ||
152 | } | ||
153 | |||
154 | /* | ||
155 | * Check for virtual address in user space. | ||
156 | */ | ||
157 | int arch_check_va_in_userspace(unsigned long va, u8 hbp_len) | ||
158 | { | ||
159 | unsigned int len; | ||
160 | |||
161 | len = get_hbp_len(hbp_len); | ||
162 | |||
163 | return (va <= TASK_SIZE - len); | ||
164 | } | ||
165 | |||
166 | /* | ||
167 | * Check for virtual address in kernel space. | ||
168 | */ | ||
169 | int arch_check_va_in_kernelspace(unsigned long va, u8 hbp_len) | ||
170 | { | ||
171 | unsigned int len; | ||
172 | |||
173 | len = get_hbp_len(hbp_len); | ||
174 | |||
175 | return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE); | ||
176 | } | ||
177 | |||
178 | /* | ||
179 | * Store a breakpoint's encoded address, length, and type. | ||
180 | */ | ||
181 | static int arch_store_info(struct hw_breakpoint *bp, struct task_struct *tsk) | ||
182 | { | ||
183 | /* | ||
184 | * User-space requests will always have the address field populated | ||
185 | * Symbol names from user-space are rejected | ||
186 | */ | ||
187 | if (tsk && bp->info.name) | ||
188 | return -EINVAL; | ||
189 | /* | ||
190 | * For kernel-addresses, either the address or symbol name can be | ||
191 | * specified. | ||
192 | */ | ||
193 | if (bp->info.name) | ||
194 | bp->info.address = (unsigned long) | ||
195 | kallsyms_lookup_name(bp->info.name); | ||
196 | if (bp->info.address) | ||
197 | return 0; | ||
198 | return -EINVAL; | ||
199 | } | ||
200 | |||
201 | /* | ||
202 | * Validate the arch-specific HW Breakpoint register settings | ||
203 | */ | ||
204 | int arch_validate_hwbkpt_settings(struct hw_breakpoint *bp, | ||
205 | struct task_struct *tsk) | ||
206 | { | ||
207 | unsigned int align; | ||
208 | int ret = -EINVAL; | ||
209 | |||
210 | switch (bp->info.type) { | ||
211 | /* | ||
212 | * Ptrace-refactoring code | ||
213 | * For now, we'll allow instruction breakpoint only for user-space | ||
214 | * addresses | ||
215 | */ | ||
216 | case HW_BREAKPOINT_EXECUTE: | ||
217 | if ((!arch_check_va_in_userspace(bp->info.address, | ||
218 | bp->info.len)) && | ||
219 | bp->info.len != HW_BREAKPOINT_LEN_EXECUTE) | ||
220 | return ret; | ||
221 | break; | ||
222 | case HW_BREAKPOINT_WRITE: | ||
223 | break; | ||
224 | case HW_BREAKPOINT_RW: | ||
225 | break; | ||
226 | default: | ||
227 | return ret; | ||
228 | } | ||
229 | |||
230 | switch (bp->info.len) { | ||
231 | case HW_BREAKPOINT_LEN_1: | ||
232 | align = 0; | ||
233 | break; | ||
234 | case HW_BREAKPOINT_LEN_2: | ||
235 | align = 1; | ||
236 | break; | ||
237 | case HW_BREAKPOINT_LEN_4: | ||
238 | align = 3; | ||
239 | break; | ||
240 | #ifdef CONFIG_X86_64 | ||
241 | case HW_BREAKPOINT_LEN_8: | ||
242 | align = 7; | ||
243 | break; | ||
244 | #endif | ||
245 | default: | ||
246 | return ret; | ||
247 | } | ||
248 | |||
249 | if (bp->triggered) | ||
250 | ret = arch_store_info(bp, tsk); | ||
251 | |||
252 | if (ret < 0) | ||
253 | return ret; | ||
254 | /* | ||
255 | * Check that the low-order bits of the address are appropriate | ||
256 | * for the alignment implied by len. | ||
257 | */ | ||
258 | if (bp->info.address & align) | ||
259 | return -EINVAL; | ||
260 | |||
261 | /* Check that the virtual address is in the proper range */ | ||
262 | if (tsk) { | ||
263 | if (!arch_check_va_in_userspace(bp->info.address, bp->info.len)) | ||
264 | return -EFAULT; | ||
265 | } else { | ||
266 | if (!arch_check_va_in_kernelspace(bp->info.address, | ||
267 | bp->info.len)) | ||
268 | return -EFAULT; | ||
269 | } | ||
270 | return 0; | ||
271 | } | ||
272 | |||
273 | void arch_update_user_hw_breakpoint(int pos, struct task_struct *tsk) | ||
274 | { | ||
275 | struct thread_struct *thread = &(tsk->thread); | ||
276 | struct hw_breakpoint *bp = thread->hbp[pos]; | ||
277 | |||
278 | thread->debugreg7 &= ~dr7_masks[pos]; | ||
279 | if (bp) { | ||
280 | thread->debugreg[pos] = bp->info.address; | ||
281 | thread->debugreg7 |= encode_dr7(pos, bp->info.len, | ||
282 | bp->info.type); | ||
283 | } else | ||
284 | thread->debugreg[pos] = 0; | ||
285 | } | ||
286 | |||
287 | void arch_flush_thread_hw_breakpoint(struct task_struct *tsk) | ||
288 | { | ||
289 | int i; | ||
290 | struct thread_struct *thread = &(tsk->thread); | ||
291 | |||
292 | thread->debugreg7 = 0; | ||
293 | for (i = 0; i < HBP_NUM; i++) | ||
294 | thread->debugreg[i] = 0; | ||
295 | } | ||
296 | |||
297 | /* | ||
298 | * Handle debug exception notifications. | ||
299 | * | ||
300 | * Return value is either NOTIFY_STOP or NOTIFY_DONE as explained below. | ||
301 | * | ||
302 | * NOTIFY_DONE returned if one of the following conditions is true. | ||
303 | * i) When the causative address is from user-space and the exception | ||
304 | * is a valid one, i.e. not triggered as a result of lazy debug register | ||
305 | * switching | ||
306 | * ii) When there are more bits than trap<n> set in DR6 register (such | ||
307 | * as BD, BS or BT) indicating that more than one debug condition is | ||
308 | * met and requires some more action in do_debug(). | ||
309 | * | ||
310 | * NOTIFY_STOP returned for all other cases | ||
311 | * | ||
312 | */ | ||
313 | int __kprobes hw_breakpoint_handler(struct die_args *args) | ||
314 | { | ||
315 | int i, cpu, rc = NOTIFY_STOP; | ||
316 | struct hw_breakpoint *bp; | ||
317 | unsigned long dr7, dr6; | ||
318 | unsigned long *dr6_p; | ||
319 | |||
320 | /* The DR6 value is pointed by args->err */ | ||
321 | dr6_p = (unsigned long *)ERR_PTR(args->err); | ||
322 | dr6 = *dr6_p; | ||
323 | |||
324 | /* Do an early return if no trap bits are set in DR6 */ | ||
325 | if ((dr6 & DR_TRAP_BITS) == 0) | ||
326 | return NOTIFY_DONE; | ||
327 | |||
328 | /* Lazy debug register switching */ | ||
329 | if (!test_tsk_thread_flag(current, TIF_DEBUG)) | ||
330 | arch_uninstall_thread_hw_breakpoint(); | ||
331 | |||
332 | get_debugreg(dr7, 7); | ||
333 | /* Disable breakpoints during exception handling */ | ||
334 | set_debugreg(0UL, 7); | ||
335 | /* | ||
336 | * Assert that local interrupts are disabled | ||
337 | * Reset the DRn bits in the virtualized register value. | ||
338 | * The ptrace trigger routine will add in whatever is needed. | ||
339 | */ | ||
340 | current->thread.debugreg6 &= ~DR_TRAP_BITS; | ||
341 | cpu = get_cpu(); | ||
342 | |||
343 | /* Handle all the breakpoints that were triggered */ | ||
344 | for (i = 0; i < HBP_NUM; ++i) { | ||
345 | if (likely(!(dr6 & (DR_TRAP0 << i)))) | ||
346 | continue; | ||
347 | /* | ||
348 | * Find the corresponding hw_breakpoint structure and | ||
349 | * invoke its triggered callback. | ||
350 | */ | ||
351 | if (i >= hbp_kernel_pos) | ||
352 | bp = per_cpu(this_hbp_kernel[i], cpu); | ||
353 | else { | ||
354 | bp = current->thread.hbp[i]; | ||
355 | if (bp) | ||
356 | rc = NOTIFY_DONE; | ||
357 | } | ||
358 | /* | ||
359 | * Reset the 'i'th TRAP bit in dr6 to denote completion of | ||
360 | * exception handling | ||
361 | */ | ||
362 | (*dr6_p) &= ~(DR_TRAP0 << i); | ||
363 | /* | ||
364 | * bp can be NULL due to lazy debug register switching | ||
365 | * or due to the delay between updates of hbp_kernel_pos | ||
366 | * and this_hbp_kernel. | ||
367 | */ | ||
368 | if (!bp) | ||
369 | continue; | ||
370 | |||
371 | (bp->triggered)(bp, args->regs); | ||
372 | } | ||
373 | if (dr6 & (~DR_TRAP_BITS)) | ||
374 | rc = NOTIFY_DONE; | ||
375 | |||
376 | set_debugreg(dr7, 7); | ||
377 | put_cpu(); | ||
378 | return rc; | ||
379 | } | ||
380 | |||
381 | /* | ||
382 | * Handle debug exception notifications. | ||
383 | */ | ||
384 | int __kprobes hw_breakpoint_exceptions_notify( | ||
385 | struct notifier_block *unused, unsigned long val, void *data) | ||
386 | { | ||
387 | if (val != DIE_DEBUG) | ||
388 | return NOTIFY_DONE; | ||
389 | |||
390 | return hw_breakpoint_handler(data); | ||
391 | } | ||
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 8d82a77a3f3b..34e86b67550c 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c | |||
@@ -43,6 +43,7 @@ | |||
43 | #include <linux/smp.h> | 43 | #include <linux/smp.h> |
44 | #include <linux/nmi.h> | 44 | #include <linux/nmi.h> |
45 | 45 | ||
46 | #include <asm/debugreg.h> | ||
46 | #include <asm/apicdef.h> | 47 | #include <asm/apicdef.h> |
47 | #include <asm/system.h> | 48 | #include <asm/system.h> |
48 | 49 | ||
@@ -434,6 +435,11 @@ single_step_cont(struct pt_regs *regs, struct die_args *args) | |||
434 | "resuming...\n"); | 435 | "resuming...\n"); |
435 | kgdb_arch_handle_exception(args->trapnr, args->signr, | 436 | kgdb_arch_handle_exception(args->trapnr, args->signr, |
436 | args->err, "c", "", regs); | 437 | args->err, "c", "", regs); |
438 | /* | ||
439 | * Reset the BS bit in dr6 (pointed by args->err) to | ||
440 | * denote completion of processing | ||
441 | */ | ||
442 | (*(unsigned long *)ERR_PTR(args->err)) &= ~DR_STEP; | ||
437 | 443 | ||
438 | return NOTIFY_STOP; | 444 | return NOTIFY_STOP; |
439 | } | 445 | } |
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 7b5169d2b000..b5b1848c5336 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c | |||
@@ -54,6 +54,7 @@ | |||
54 | #include <asm/pgtable.h> | 54 | #include <asm/pgtable.h> |
55 | #include <asm/uaccess.h> | 55 | #include <asm/uaccess.h> |
56 | #include <asm/alternative.h> | 56 | #include <asm/alternative.h> |
57 | #include <asm/debugreg.h> | ||
57 | 58 | ||
58 | void jprobe_return_end(void); | 59 | void jprobe_return_end(void); |
59 | 60 | ||
@@ -967,8 +968,14 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, | |||
967 | ret = NOTIFY_STOP; | 968 | ret = NOTIFY_STOP; |
968 | break; | 969 | break; |
969 | case DIE_DEBUG: | 970 | case DIE_DEBUG: |
970 | if (post_kprobe_handler(args->regs)) | 971 | if (post_kprobe_handler(args->regs)) { |
972 | /* | ||
973 | * Reset the BS bit in dr6 (pointed by args->err) to | ||
974 | * denote completion of processing | ||
975 | */ | ||
976 | (*(unsigned long *)ERR_PTR(args->err)) &= ~DR_STEP; | ||
971 | ret = NOTIFY_STOP; | 977 | ret = NOTIFY_STOP; |
978 | } | ||
972 | break; | 979 | break; |
973 | case DIE_GPF: | 980 | case DIE_GPF: |
974 | /* | 981 | /* |
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index c1c429d00130..c843f8406da2 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #include <asm/desc.h> | 25 | #include <asm/desc.h> |
26 | #include <asm/system.h> | 26 | #include <asm/system.h> |
27 | #include <asm/cacheflush.h> | 27 | #include <asm/cacheflush.h> |
28 | #include <asm/debugreg.h> | ||
28 | 29 | ||
29 | static void set_idt(void *newidt, __u16 limit) | 30 | static void set_idt(void *newidt, __u16 limit) |
30 | { | 31 | { |
@@ -202,6 +203,7 @@ void machine_kexec(struct kimage *image) | |||
202 | 203 | ||
203 | /* Interrupts aren't acceptable while we reboot */ | 204 | /* Interrupts aren't acceptable while we reboot */ |
204 | local_irq_disable(); | 205 | local_irq_disable(); |
206 | hw_breakpoint_disable(); | ||
205 | 207 | ||
206 | if (image->preserve_context) { | 208 | if (image->preserve_context) { |
207 | #ifdef CONFIG_X86_IO_APIC | 209 | #ifdef CONFIG_X86_IO_APIC |
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 84c3bf209e98..4a8bb82248ae 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <asm/pgtable.h> | 18 | #include <asm/pgtable.h> |
19 | #include <asm/tlbflush.h> | 19 | #include <asm/tlbflush.h> |
20 | #include <asm/mmu_context.h> | 20 | #include <asm/mmu_context.h> |
21 | #include <asm/debugreg.h> | ||
21 | 22 | ||
22 | static int init_one_level2_page(struct kimage *image, pgd_t *pgd, | 23 | static int init_one_level2_page(struct kimage *image, pgd_t *pgd, |
23 | unsigned long addr) | 24 | unsigned long addr) |
@@ -282,6 +283,7 @@ void machine_kexec(struct kimage *image) | |||
282 | 283 | ||
283 | /* Interrupts aren't acceptable while we reboot */ | 284 | /* Interrupts aren't acceptable while we reboot */ |
284 | local_irq_disable(); | 285 | local_irq_disable(); |
286 | hw_breakpoint_disable(); | ||
285 | 287 | ||
286 | if (image->preserve_context) { | 288 | if (image->preserve_context) { |
287 | #ifdef CONFIG_X86_IO_APIC | 289 | #ifdef CONFIG_X86_IO_APIC |
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 994dd6a4a2a0..fc6e4b773fc4 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
@@ -17,6 +17,8 @@ | |||
17 | #include <asm/uaccess.h> | 17 | #include <asm/uaccess.h> |
18 | #include <asm/i387.h> | 18 | #include <asm/i387.h> |
19 | #include <asm/ds.h> | 19 | #include <asm/ds.h> |
20 | #include <asm/debugreg.h> | ||
21 | #include <asm/hw_breakpoint.h> | ||
20 | 22 | ||
21 | unsigned long idle_halt; | 23 | unsigned long idle_halt; |
22 | EXPORT_SYMBOL(idle_halt); | 24 | EXPORT_SYMBOL(idle_halt); |
@@ -48,6 +50,8 @@ void free_thread_xstate(struct task_struct *tsk) | |||
48 | kmem_cache_free(task_xstate_cachep, tsk->thread.xstate); | 50 | kmem_cache_free(task_xstate_cachep, tsk->thread.xstate); |
49 | tsk->thread.xstate = NULL; | 51 | tsk->thread.xstate = NULL; |
50 | } | 52 | } |
53 | if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG))) | ||
54 | flush_thread_hw_breakpoint(tsk); | ||
51 | 55 | ||
52 | WARN(tsk->thread.ds_ctx, "leaking DS context\n"); | 56 | WARN(tsk->thread.ds_ctx, "leaking DS context\n"); |
53 | } | 57 | } |
@@ -108,12 +112,8 @@ void flush_thread(void) | |||
108 | 112 | ||
109 | clear_tsk_thread_flag(tsk, TIF_DEBUG); | 113 | clear_tsk_thread_flag(tsk, TIF_DEBUG); |
110 | 114 | ||
111 | tsk->thread.debugreg0 = 0; | 115 | if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG))) |
112 | tsk->thread.debugreg1 = 0; | 116 | flush_thread_hw_breakpoint(tsk); |
113 | tsk->thread.debugreg2 = 0; | ||
114 | tsk->thread.debugreg3 = 0; | ||
115 | tsk->thread.debugreg6 = 0; | ||
116 | tsk->thread.debugreg7 = 0; | ||
117 | memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); | 117 | memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); |
118 | /* | 118 | /* |
119 | * Forget coprocessor state.. | 119 | * Forget coprocessor state.. |
@@ -195,16 +195,6 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, | |||
195 | else if (next->debugctlmsr != prev->debugctlmsr) | 195 | else if (next->debugctlmsr != prev->debugctlmsr) |
196 | update_debugctlmsr(next->debugctlmsr); | 196 | update_debugctlmsr(next->debugctlmsr); |
197 | 197 | ||
198 | if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { | ||
199 | set_debugreg(next->debugreg0, 0); | ||
200 | set_debugreg(next->debugreg1, 1); | ||
201 | set_debugreg(next->debugreg2, 2); | ||
202 | set_debugreg(next->debugreg3, 3); | ||
203 | /* no 4 and 5 */ | ||
204 | set_debugreg(next->debugreg6, 6); | ||
205 | set_debugreg(next->debugreg7, 7); | ||
206 | } | ||
207 | |||
208 | if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ | 198 | if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ |
209 | test_tsk_thread_flag(next_p, TIF_NOTSC)) { | 199 | test_tsk_thread_flag(next_p, TIF_NOTSC)) { |
210 | /* prev and next are different */ | 200 | /* prev and next are different */ |
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 59f4524984af..00a8fe4c58bb 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c | |||
@@ -58,6 +58,8 @@ | |||
58 | #include <asm/idle.h> | 58 | #include <asm/idle.h> |
59 | #include <asm/syscalls.h> | 59 | #include <asm/syscalls.h> |
60 | #include <asm/ds.h> | 60 | #include <asm/ds.h> |
61 | #include <asm/debugreg.h> | ||
62 | #include <asm/hw_breakpoint.h> | ||
61 | 63 | ||
62 | asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); | 64 | asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); |
63 | 65 | ||
@@ -262,7 +264,13 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, | |||
262 | 264 | ||
263 | task_user_gs(p) = get_user_gs(regs); | 265 | task_user_gs(p) = get_user_gs(regs); |
264 | 266 | ||
267 | p->thread.io_bitmap_ptr = NULL; | ||
265 | tsk = current; | 268 | tsk = current; |
269 | err = -ENOMEM; | ||
270 | if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG))) | ||
271 | if (copy_thread_hw_breakpoint(tsk, p, clone_flags)) | ||
272 | goto out; | ||
273 | |||
266 | if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { | 274 | if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { |
267 | p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr, | 275 | p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr, |
268 | IO_BITMAP_BYTES, GFP_KERNEL); | 276 | IO_BITMAP_BYTES, GFP_KERNEL); |
@@ -282,10 +290,13 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, | |||
282 | err = do_set_thread_area(p, -1, | 290 | err = do_set_thread_area(p, -1, |
283 | (struct user_desc __user *)childregs->si, 0); | 291 | (struct user_desc __user *)childregs->si, 0); |
284 | 292 | ||
293 | out: | ||
285 | if (err && p->thread.io_bitmap_ptr) { | 294 | if (err && p->thread.io_bitmap_ptr) { |
286 | kfree(p->thread.io_bitmap_ptr); | 295 | kfree(p->thread.io_bitmap_ptr); |
287 | p->thread.io_bitmap_max = 0; | 296 | p->thread.io_bitmap_max = 0; |
288 | } | 297 | } |
298 | if (err) | ||
299 | flush_thread_hw_breakpoint(p); | ||
289 | 300 | ||
290 | clear_tsk_thread_flag(p, TIF_DS_AREA_MSR); | 301 | clear_tsk_thread_flag(p, TIF_DS_AREA_MSR); |
291 | p->thread.ds_ctx = NULL; | 302 | p->thread.ds_ctx = NULL; |
@@ -424,6 +435,23 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
424 | lazy_load_gs(next->gs); | 435 | lazy_load_gs(next->gs); |
425 | 436 | ||
426 | percpu_write(current_task, next_p); | 437 | percpu_write(current_task, next_p); |
438 | /* | ||
439 | * There's a problem with moving the arch_install_thread_hw_breakpoint() | ||
440 | * call before current is updated. Suppose a kernel breakpoint is | ||
441 | * triggered in between the two, the hw-breakpoint handler will see that | ||
442 | * the 'current' task does not have TIF_DEBUG flag set and will think it | ||
443 | * is leftover from an old task (lazy switching) and will erase it. Then | ||
444 | * until the next context switch, no user-breakpoints will be installed. | ||
445 | * | ||
446 | * The real problem is that it's impossible to update both current and | ||
447 | * physical debug registers at the same instant, so there will always be | ||
448 | * a window in which they disagree and a breakpoint might get triggered. | ||
449 | * Since we use lazy switching, we are forced to assume that a | ||
450 | * disagreement means that current is correct and the exception is due | ||
451 | * to lazy debug register switching. | ||
452 | */ | ||
453 | if (unlikely(test_tsk_thread_flag(next_p, TIF_DEBUG))) | ||
454 | arch_install_thread_hw_breakpoint(next_p); | ||
427 | 455 | ||
428 | return prev_p; | 456 | return prev_p; |
429 | } | 457 | } |
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index ebefb5407b9d..89c46f1259d3 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
@@ -52,6 +52,8 @@ | |||
52 | #include <asm/idle.h> | 52 | #include <asm/idle.h> |
53 | #include <asm/syscalls.h> | 53 | #include <asm/syscalls.h> |
54 | #include <asm/ds.h> | 54 | #include <asm/ds.h> |
55 | #include <asm/debugreg.h> | ||
56 | #include <asm/hw_breakpoint.h> | ||
55 | 57 | ||
56 | asmlinkage extern void ret_from_fork(void); | 58 | asmlinkage extern void ret_from_fork(void); |
57 | 59 | ||
@@ -245,6 +247,8 @@ void release_thread(struct task_struct *dead_task) | |||
245 | BUG(); | 247 | BUG(); |
246 | } | 248 | } |
247 | } | 249 | } |
250 | if (unlikely(dead_task->thread.debugreg7)) | ||
251 | flush_thread_hw_breakpoint(dead_task); | ||
248 | } | 252 | } |
249 | 253 | ||
250 | static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr) | 254 | static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr) |
@@ -300,12 +304,18 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, | |||
300 | 304 | ||
301 | p->thread.fs = me->thread.fs; | 305 | p->thread.fs = me->thread.fs; |
302 | p->thread.gs = me->thread.gs; | 306 | p->thread.gs = me->thread.gs; |
307 | p->thread.io_bitmap_ptr = NULL; | ||
303 | 308 | ||
304 | savesegment(gs, p->thread.gsindex); | 309 | savesegment(gs, p->thread.gsindex); |
305 | savesegment(fs, p->thread.fsindex); | 310 | savesegment(fs, p->thread.fsindex); |
306 | savesegment(es, p->thread.es); | 311 | savesegment(es, p->thread.es); |
307 | savesegment(ds, p->thread.ds); | 312 | savesegment(ds, p->thread.ds); |
308 | 313 | ||
314 | err = -ENOMEM; | ||
315 | if (unlikely(test_tsk_thread_flag(me, TIF_DEBUG))) | ||
316 | if (copy_thread_hw_breakpoint(me, p, clone_flags)) | ||
317 | goto out; | ||
318 | |||
309 | if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { | 319 | if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { |
310 | p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); | 320 | p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); |
311 | if (!p->thread.io_bitmap_ptr) { | 321 | if (!p->thread.io_bitmap_ptr) { |
@@ -344,6 +354,9 @@ out: | |||
344 | kfree(p->thread.io_bitmap_ptr); | 354 | kfree(p->thread.io_bitmap_ptr); |
345 | p->thread.io_bitmap_max = 0; | 355 | p->thread.io_bitmap_max = 0; |
346 | } | 356 | } |
357 | if (err) | ||
358 | flush_thread_hw_breakpoint(p); | ||
359 | |||
347 | return err; | 360 | return err; |
348 | } | 361 | } |
349 | 362 | ||
@@ -489,6 +502,24 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
489 | */ | 502 | */ |
490 | if (tsk_used_math(next_p) && next_p->fpu_counter > 5) | 503 | if (tsk_used_math(next_p) && next_p->fpu_counter > 5) |
491 | math_state_restore(); | 504 | math_state_restore(); |
505 | /* | ||
506 | * There's a problem with moving the arch_install_thread_hw_breakpoint() | ||
507 | * call before current is updated. Suppose a kernel breakpoint is | ||
508 | * triggered in between the two, the hw-breakpoint handler will see that | ||
509 | * the 'current' task does not have TIF_DEBUG flag set and will think it | ||
510 | * is leftover from an old task (lazy switching) and will erase it. Then | ||
511 | * until the next context switch, no user-breakpoints will be installed. | ||
512 | * | ||
513 | * The real problem is that it's impossible to update both current and | ||
514 | * physical debug registers at the same instant, so there will always be | ||
515 | * a window in which they disagree and a breakpoint might get triggered. | ||
516 | * Since we use lazy switching, we are forced to assume that a | ||
517 | * disagreement means that current is correct and the exception is due | ||
518 | * to lazy debug register switching. | ||
519 | */ | ||
520 | if (unlikely(test_tsk_thread_flag(next_p, TIF_DEBUG))) | ||
521 | arch_install_thread_hw_breakpoint(next_p); | ||
522 | |||
492 | return prev_p; | 523 | return prev_p; |
493 | } | 524 | } |
494 | 525 | ||
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 09ecbde91c13..b457f78b7dbf 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c | |||
@@ -34,6 +34,7 @@ | |||
34 | #include <asm/prctl.h> | 34 | #include <asm/prctl.h> |
35 | #include <asm/proto.h> | 35 | #include <asm/proto.h> |
36 | #include <asm/ds.h> | 36 | #include <asm/ds.h> |
37 | #include <asm/hw_breakpoint.h> | ||
37 | 38 | ||
38 | #include <trace/syscall.h> | 39 | #include <trace/syscall.h> |
39 | 40 | ||
@@ -136,11 +137,6 @@ static int set_segment_reg(struct task_struct *task, | |||
136 | return 0; | 137 | return 0; |
137 | } | 138 | } |
138 | 139 | ||
139 | static unsigned long debugreg_addr_limit(struct task_struct *task) | ||
140 | { | ||
141 | return TASK_SIZE - 3; | ||
142 | } | ||
143 | |||
144 | #else /* CONFIG_X86_64 */ | 140 | #else /* CONFIG_X86_64 */ |
145 | 141 | ||
146 | #define FLAG_MASK (FLAG_MASK_32 | X86_EFLAGS_NT) | 142 | #define FLAG_MASK (FLAG_MASK_32 | X86_EFLAGS_NT) |
@@ -265,15 +261,6 @@ static int set_segment_reg(struct task_struct *task, | |||
265 | return 0; | 261 | return 0; |
266 | } | 262 | } |
267 | 263 | ||
268 | static unsigned long debugreg_addr_limit(struct task_struct *task) | ||
269 | { | ||
270 | #ifdef CONFIG_IA32_EMULATION | ||
271 | if (test_tsk_thread_flag(task, TIF_IA32)) | ||
272 | return IA32_PAGE_OFFSET - 3; | ||
273 | #endif | ||
274 | return TASK_SIZE_MAX - 7; | ||
275 | } | ||
276 | |||
277 | #endif /* CONFIG_X86_32 */ | 264 | #endif /* CONFIG_X86_32 */ |
278 | 265 | ||
279 | static unsigned long get_flags(struct task_struct *task) | 266 | static unsigned long get_flags(struct task_struct *task) |
@@ -464,95 +451,159 @@ static int genregs_set(struct task_struct *target, | |||
464 | } | 451 | } |
465 | 452 | ||
466 | /* | 453 | /* |
467 | * This function is trivial and will be inlined by the compiler. | 454 | * Decode the length and type bits for a particular breakpoint as |
468 | * Having it separates the implementation details of debug | 455 | * stored in debug register 7. Return the "enabled" status. |
469 | * registers from the interface details of ptrace. | ||
470 | */ | 456 | */ |
471 | static unsigned long ptrace_get_debugreg(struct task_struct *child, int n) | 457 | static int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, |
458 | unsigned *type) | ||
472 | { | 459 | { |
473 | switch (n) { | 460 | int bp_info = dr7 >> (DR_CONTROL_SHIFT + bpnum * DR_CONTROL_SIZE); |
474 | case 0: return child->thread.debugreg0; | 461 | |
475 | case 1: return child->thread.debugreg1; | 462 | *len = (bp_info & 0xc) | 0x40; |
476 | case 2: return child->thread.debugreg2; | 463 | *type = (bp_info & 0x3) | 0x80; |
477 | case 3: return child->thread.debugreg3; | 464 | return (dr7 >> (bpnum * DR_ENABLE_SIZE)) & 0x3; |
478 | case 6: return child->thread.debugreg6; | ||
479 | case 7: return child->thread.debugreg7; | ||
480 | } | ||
481 | return 0; | ||
482 | } | 465 | } |
483 | 466 | ||
484 | static int ptrace_set_debugreg(struct task_struct *child, | 467 | static void ptrace_triggered(struct hw_breakpoint *bp, struct pt_regs *regs) |
485 | int n, unsigned long data) | ||
486 | { | 468 | { |
469 | struct thread_struct *thread = &(current->thread); | ||
487 | int i; | 470 | int i; |
488 | 471 | ||
489 | if (unlikely(n == 4 || n == 5)) | 472 | /* |
490 | return -EIO; | 473 | * Store in the virtual DR6 register the fact that the breakpoint |
474 | * was hit so the thread's debugger will see it. | ||
475 | */ | ||
476 | for (i = 0; i < hbp_kernel_pos; i++) | ||
477 | /* | ||
478 | * We will check bp->info.address against the address stored in | ||
479 | * thread's hbp structure and not debugreg[i]. This is to ensure | ||
480 | * that the corresponding bit for 'i' in DR7 register is enabled | ||
481 | */ | ||
482 | if (bp->info.address == thread->hbp[i]->info.address) | ||
483 | break; | ||
491 | 484 | ||
492 | if (n < 4 && unlikely(data >= debugreg_addr_limit(child))) | 485 | thread->debugreg6 |= (DR_TRAP0 << i); |
493 | return -EIO; | 486 | } |
494 | 487 | ||
495 | switch (n) { | 488 | /* |
496 | case 0: child->thread.debugreg0 = data; break; | 489 | * Handle ptrace writes to debug register 7. |
497 | case 1: child->thread.debugreg1 = data; break; | 490 | */ |
498 | case 2: child->thread.debugreg2 = data; break; | 491 | static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data) |
499 | case 3: child->thread.debugreg3 = data; break; | 492 | { |
493 | struct thread_struct *thread = &(tsk->thread); | ||
494 | unsigned long old_dr7 = thread->debugreg7; | ||
495 | int i, orig_ret = 0, rc = 0; | ||
496 | int enabled, second_pass = 0; | ||
497 | unsigned len, type; | ||
498 | struct hw_breakpoint *bp; | ||
499 | |||
500 | data &= ~DR_CONTROL_RESERVED; | ||
501 | restore: | ||
502 | /* | ||
503 | * Loop through all the hardware breakpoints, making the | ||
504 | * appropriate changes to each. | ||
505 | */ | ||
506 | for (i = 0; i < HBP_NUM; i++) { | ||
507 | enabled = decode_dr7(data, i, &len, &type); | ||
508 | bp = thread->hbp[i]; | ||
509 | |||
510 | if (!enabled) { | ||
511 | if (bp) { | ||
512 | /* Don't unregister the breakpoints right-away, | ||
513 | * unless all register_user_hw_breakpoint() | ||
514 | * requests have succeeded. This prevents | ||
515 | * any window of opportunity for debug | ||
516 | * register grabbing by other users. | ||
517 | */ | ||
518 | if (!second_pass) | ||
519 | continue; | ||
520 | unregister_user_hw_breakpoint(tsk, bp); | ||
521 | kfree(bp); | ||
522 | } | ||
523 | continue; | ||
524 | } | ||
525 | if (!bp) { | ||
526 | rc = -ENOMEM; | ||
527 | bp = kzalloc(sizeof(struct hw_breakpoint), GFP_KERNEL); | ||
528 | if (bp) { | ||
529 | bp->info.address = thread->debugreg[i]; | ||
530 | bp->triggered = ptrace_triggered; | ||
531 | bp->info.len = len; | ||
532 | bp->info.type = type; | ||
533 | rc = register_user_hw_breakpoint(tsk, bp); | ||
534 | if (rc) | ||
535 | kfree(bp); | ||
536 | } | ||
537 | } else | ||
538 | rc = modify_user_hw_breakpoint(tsk, bp); | ||
539 | if (rc) | ||
540 | break; | ||
541 | } | ||
542 | /* | ||
543 | * Make a second pass to free the remaining unused breakpoints | ||
544 | * or to restore the original breakpoints if an error occurred. | ||
545 | */ | ||
546 | if (!second_pass) { | ||
547 | second_pass = 1; | ||
548 | if (rc < 0) { | ||
549 | orig_ret = rc; | ||
550 | data = old_dr7; | ||
551 | } | ||
552 | goto restore; | ||
553 | } | ||
554 | return ((orig_ret < 0) ? orig_ret : rc); | ||
555 | } | ||
500 | 556 | ||
501 | case 6: | 557 | /* |
502 | if ((data & ~0xffffffffUL) != 0) | 558 | * Handle PTRACE_PEEKUSR calls for the debug register area. |
503 | return -EIO; | 559 | */ |
504 | child->thread.debugreg6 = data; | 560 | unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n) |
505 | break; | 561 | { |
562 | struct thread_struct *thread = &(tsk->thread); | ||
563 | unsigned long val = 0; | ||
564 | |||
565 | if (n < HBP_NUM) | ||
566 | val = thread->debugreg[n]; | ||
567 | else if (n == 6) | ||
568 | val = thread->debugreg6; | ||
569 | else if (n == 7) | ||
570 | val = thread->debugreg7; | ||
571 | return val; | ||
572 | } | ||
506 | 573 | ||
507 | case 7: | 574 | /* |
508 | /* | 575 | * Handle PTRACE_POKEUSR calls for the debug register area. |
509 | * Sanity-check data. Take one half-byte at once with | 576 | */ |
510 | * check = (val >> (16 + 4*i)) & 0xf. It contains the | 577 | int ptrace_set_debugreg(struct task_struct *tsk, int n, unsigned long val) |
511 | * R/Wi and LENi bits; bits 0 and 1 are R/Wi, and bits | 578 | { |
512 | * 2 and 3 are LENi. Given a list of invalid values, | 579 | struct thread_struct *thread = &(tsk->thread); |
513 | * we do mask |= 1 << invalid_value, so that | 580 | int rc = 0; |
514 | * (mask >> check) & 1 is a correct test for invalid | 581 | |
515 | * values. | 582 | /* There are no DR4 or DR5 registers */ |
516 | * | 583 | if (n == 4 || n == 5) |
517 | * R/Wi contains the type of the breakpoint / | 584 | return -EIO; |
518 | * watchpoint, LENi contains the length of the watched | 585 | |
519 | * data in the watchpoint case. | 586 | if (n == 6) { |
520 | * | 587 | tsk->thread.debugreg6 = val; |
521 | * The invalid values are: | 588 | goto ret_path; |
522 | * - LENi == 0x10 (undefined), so mask |= 0x0f00. [32-bit] | ||
523 | * - R/Wi == 0x10 (break on I/O reads or writes), so | ||
524 | * mask |= 0x4444. | ||
525 | * - R/Wi == 0x00 && LENi != 0x00, so we have mask |= | ||
526 | * 0x1110. | ||
527 | * | ||
528 | * Finally, mask = 0x0f00 | 0x4444 | 0x1110 == 0x5f54. | ||
529 | * | ||
530 | * See the Intel Manual "System Programming Guide", | ||
531 | * 15.2.4 | ||
532 | * | ||
533 | * Note that LENi == 0x10 is defined on x86_64 in long | ||
534 | * mode (i.e. even for 32-bit userspace software, but | ||
535 | * 64-bit kernel), so the x86_64 mask value is 0x5454. | ||
536 | * See the AMD manual no. 24593 (AMD64 System Programming) | ||
537 | */ | ||
538 | #ifdef CONFIG_X86_32 | ||
539 | #define DR7_MASK 0x5f54 | ||
540 | #else | ||
541 | #define DR7_MASK 0x5554 | ||
542 | #endif | ||
543 | data &= ~DR_CONTROL_RESERVED; | ||
544 | for (i = 0; i < 4; i++) | ||
545 | if ((DR7_MASK >> ((data >> (16 + 4*i)) & 0xf)) & 1) | ||
546 | return -EIO; | ||
547 | child->thread.debugreg7 = data; | ||
548 | if (data) | ||
549 | set_tsk_thread_flag(child, TIF_DEBUG); | ||
550 | else | ||
551 | clear_tsk_thread_flag(child, TIF_DEBUG); | ||
552 | break; | ||
553 | } | 589 | } |
590 | if (n < HBP_NUM) { | ||
591 | if (thread->hbp[n]) { | ||
592 | if (arch_check_va_in_userspace(val, | ||
593 | thread->hbp[n]->info.len) == 0) { | ||
594 | rc = -EIO; | ||
595 | goto ret_path; | ||
596 | } | ||
597 | thread->hbp[n]->info.address = val; | ||
598 | } | ||
599 | thread->debugreg[n] = val; | ||
600 | } | ||
601 | /* All that's left is DR7 */ | ||
602 | if (n == 7) | ||
603 | rc = ptrace_write_dr7(tsk, val); | ||
554 | 604 | ||
555 | return 0; | 605 | ret_path: |
606 | return rc; | ||
556 | } | 607 | } |
557 | 608 | ||
558 | /* | 609 | /* |
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 4c578751e94e..0f89a4f20db2 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c | |||
@@ -799,15 +799,6 @@ static void do_signal(struct pt_regs *regs) | |||
799 | 799 | ||
800 | signr = get_signal_to_deliver(&info, &ka, regs, NULL); | 800 | signr = get_signal_to_deliver(&info, &ka, regs, NULL); |
801 | if (signr > 0) { | 801 | if (signr > 0) { |
802 | /* | ||
803 | * Re-enable any watchpoints before delivering the | ||
804 | * signal to user space. The processor register will | ||
805 | * have been cleared if the watchpoint triggered | ||
806 | * inside the kernel. | ||
807 | */ | ||
808 | if (current->thread.debugreg7) | ||
809 | set_debugreg(current->thread.debugreg7, 7); | ||
810 | |||
811 | /* Whee! Actually deliver the signal. */ | 802 | /* Whee! Actually deliver the signal. */ |
812 | if (handle_signal(signr, &info, &ka, oldset, regs) == 0) { | 803 | if (handle_signal(signr, &info, &ka, oldset, regs) == 0) { |
813 | /* | 804 | /* |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 2fecda69ee64..dee0f3d814af 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -63,6 +63,7 @@ | |||
63 | #include <asm/apic.h> | 63 | #include <asm/apic.h> |
64 | #include <asm/setup.h> | 64 | #include <asm/setup.h> |
65 | #include <asm/uv/uv.h> | 65 | #include <asm/uv/uv.h> |
66 | #include <asm/debugreg.h> | ||
66 | #include <linux/mc146818rtc.h> | 67 | #include <linux/mc146818rtc.h> |
67 | 68 | ||
68 | #include <asm/smpboot_hooks.h> | 69 | #include <asm/smpboot_hooks.h> |
@@ -326,6 +327,7 @@ notrace static void __cpuinit start_secondary(void *unused) | |||
326 | setup_secondary_clock(); | 327 | setup_secondary_clock(); |
327 | 328 | ||
328 | wmb(); | 329 | wmb(); |
330 | load_debug_registers(); | ||
329 | cpu_idle(); | 331 | cpu_idle(); |
330 | } | 332 | } |
331 | 333 | ||
@@ -1254,6 +1256,7 @@ void cpu_disable_common(void) | |||
1254 | remove_cpu_from_maps(cpu); | 1256 | remove_cpu_from_maps(cpu); |
1255 | unlock_vector_lock(); | 1257 | unlock_vector_lock(); |
1256 | fixup_irqs(); | 1258 | fixup_irqs(); |
1259 | hw_breakpoint_disable(); | ||
1257 | } | 1260 | } |
1258 | 1261 | ||
1259 | int native_cpu_disable(void) | 1262 | int native_cpu_disable(void) |
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 5f935f0d5861..286d64eba31b 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
@@ -530,77 +530,56 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) | |||
530 | dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) | 530 | dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) |
531 | { | 531 | { |
532 | struct task_struct *tsk = current; | 532 | struct task_struct *tsk = current; |
533 | unsigned long condition; | 533 | unsigned long dr6; |
534 | int si_code; | 534 | int si_code; |
535 | 535 | ||
536 | get_debugreg(condition, 6); | 536 | get_debugreg(dr6, 6); |
537 | 537 | ||
538 | /* Catch kmemcheck conditions first of all! */ | 538 | /* Catch kmemcheck conditions first of all! */ |
539 | if (condition & DR_STEP && kmemcheck_trap(regs)) | 539 | if ((dr6 & DR_STEP) && kmemcheck_trap(regs)) |
540 | return; | 540 | return; |
541 | 541 | ||
542 | /* DR6 may or may not be cleared by the CPU */ | ||
543 | set_debugreg(0, 6); | ||
542 | /* | 544 | /* |
543 | * The processor cleared BTF, so don't mark that we need it set. | 545 | * The processor cleared BTF, so don't mark that we need it set. |
544 | */ | 546 | */ |
545 | clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR); | 547 | clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR); |
546 | tsk->thread.debugctlmsr = 0; | 548 | tsk->thread.debugctlmsr = 0; |
547 | 549 | ||
548 | if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, | 550 | /* Store the virtualized DR6 value */ |
549 | SIGTRAP) == NOTIFY_STOP) | 551 | tsk->thread.debugreg6 = dr6; |
552 | |||
553 | if (notify_die(DIE_DEBUG, "debug", regs, PTR_ERR(&dr6), error_code, | ||
554 | SIGTRAP) == NOTIFY_STOP) | ||
550 | return; | 555 | return; |
551 | 556 | ||
552 | /* It's safe to allow irq's after DR6 has been saved */ | 557 | /* It's safe to allow irq's after DR6 has been saved */ |
553 | preempt_conditional_sti(regs); | 558 | preempt_conditional_sti(regs); |
554 | 559 | ||
555 | /* Mask out spurious debug traps due to lazy DR7 setting */ | 560 | if (regs->flags & X86_VM_MASK) { |
556 | if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { | 561 | handle_vm86_trap((struct kernel_vm86_regs *) regs, |
557 | if (!tsk->thread.debugreg7) | 562 | error_code, 1); |
558 | goto clear_dr7; | 563 | return; |
559 | } | 564 | } |
560 | 565 | ||
561 | #ifdef CONFIG_X86_32 | ||
562 | if (regs->flags & X86_VM_MASK) | ||
563 | goto debug_vm86; | ||
564 | #endif | ||
565 | |||
566 | /* Save debug status register where ptrace can see it */ | ||
567 | tsk->thread.debugreg6 = condition; | ||
568 | |||
569 | /* | 566 | /* |
570 | * Single-stepping through TF: make sure we ignore any events in | 567 | * Single-stepping through system calls: ignore any exceptions in |
571 | * kernel space (but re-enable TF when returning to user mode). | 568 | * kernel space, but re-enable TF when returning to user mode. |
569 | * | ||
570 | * We already checked v86 mode above, so we can check for kernel mode | ||
571 | * by just checking the CPL of CS. | ||
572 | */ | 572 | */ |
573 | if (condition & DR_STEP) { | 573 | if ((dr6 & DR_STEP) && !user_mode(regs)) { |
574 | if (!user_mode(regs)) | 574 | tsk->thread.debugreg6 &= ~DR_STEP; |
575 | goto clear_TF_reenable; | 575 | set_tsk_thread_flag(tsk, TIF_SINGLESTEP); |
576 | regs->flags &= ~X86_EFLAGS_TF; | ||
576 | } | 577 | } |
577 | 578 | si_code = get_si_code(tsk->thread.debugreg6); | |
578 | si_code = get_si_code(condition); | 579 | if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS)) |
579 | /* Ok, finally something we can handle */ | 580 | send_sigtrap(tsk, regs, error_code, si_code); |
580 | send_sigtrap(tsk, regs, error_code, si_code); | ||
581 | |||
582 | /* | ||
583 | * Disable additional traps. They'll be re-enabled when | ||
584 | * the signal is delivered. | ||
585 | */ | ||
586 | clear_dr7: | ||
587 | set_debugreg(0, 7); | ||
588 | preempt_conditional_cli(regs); | 581 | preempt_conditional_cli(regs); |
589 | return; | ||
590 | 582 | ||
591 | #ifdef CONFIG_X86_32 | ||
592 | debug_vm86: | ||
593 | /* reenable preemption: handle_vm86_trap() might sleep */ | ||
594 | dec_preempt_count(); | ||
595 | handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1); | ||
596 | conditional_cli(regs); | ||
597 | return; | ||
598 | #endif | ||
599 | |||
600 | clear_TF_reenable: | ||
601 | set_tsk_thread_flag(tsk, TIF_SINGLESTEP); | ||
602 | regs->flags &= ~X86_EFLAGS_TF; | ||
603 | preempt_conditional_cli(regs); | ||
604 | return; | 583 | return; |
605 | } | 584 | } |
606 | 585 | ||
diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index 16ccbd77917f..11a4ad4d6253 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c | |||
@@ -540,8 +540,14 @@ kmmio_die_notifier(struct notifier_block *nb, unsigned long val, void *args) | |||
540 | struct die_args *arg = args; | 540 | struct die_args *arg = args; |
541 | 541 | ||
542 | if (val == DIE_DEBUG && (arg->err & DR_STEP)) | 542 | if (val == DIE_DEBUG && (arg->err & DR_STEP)) |
543 | if (post_kmmio_handler(arg->err, arg->regs) == 1) | 543 | if (post_kmmio_handler(arg->err, arg->regs) == 1) { |
544 | /* | ||
545 | * Reset the BS bit in dr6 (pointed by args->err) to | ||
546 | * denote completion of processing | ||
547 | */ | ||
548 | (*(unsigned long *)ERR_PTR(arg->err)) &= ~DR_STEP; | ||
544 | return NOTIFY_STOP; | 549 | return NOTIFY_STOP; |
550 | } | ||
545 | 551 | ||
546 | return NOTIFY_DONE; | 552 | return NOTIFY_DONE; |
547 | } | 553 | } |
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index d277ef1eea51..394cbb88987c 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <asm/mce.h> | 18 | #include <asm/mce.h> |
19 | #include <asm/xcr.h> | 19 | #include <asm/xcr.h> |
20 | #include <asm/suspend.h> | 20 | #include <asm/suspend.h> |
21 | #include <asm/debugreg.h> | ||
21 | 22 | ||
22 | #ifdef CONFIG_X86_32 | 23 | #ifdef CONFIG_X86_32 |
23 | static struct saved_context saved_context; | 24 | static struct saved_context saved_context; |
@@ -104,6 +105,7 @@ static void __save_processor_state(struct saved_context *ctxt) | |||
104 | ctxt->cr4 = read_cr4(); | 105 | ctxt->cr4 = read_cr4(); |
105 | ctxt->cr8 = read_cr8(); | 106 | ctxt->cr8 = read_cr8(); |
106 | #endif | 107 | #endif |
108 | hw_breakpoint_disable(); | ||
107 | } | 109 | } |
108 | 110 | ||
109 | /* Needed by apm.c */ | 111 | /* Needed by apm.c */ |
@@ -146,27 +148,7 @@ static void fix_processor_context(void) | |||
146 | /* | 148 | /* |
147 | * Now maybe reload the debug registers | 149 | * Now maybe reload the debug registers |
148 | */ | 150 | */ |
149 | if (current->thread.debugreg7) { | 151 | load_debug_registers(); |
150 | #ifdef CONFIG_X86_32 | ||
151 | set_debugreg(current->thread.debugreg0, 0); | ||
152 | set_debugreg(current->thread.debugreg1, 1); | ||
153 | set_debugreg(current->thread.debugreg2, 2); | ||
154 | set_debugreg(current->thread.debugreg3, 3); | ||
155 | /* no 4 and 5 */ | ||
156 | set_debugreg(current->thread.debugreg6, 6); | ||
157 | set_debugreg(current->thread.debugreg7, 7); | ||
158 | #else | ||
159 | /* CONFIG_X86_64 */ | ||
160 | loaddebug(¤t->thread, 0); | ||
161 | loaddebug(¤t->thread, 1); | ||
162 | loaddebug(¤t->thread, 2); | ||
163 | loaddebug(¤t->thread, 3); | ||
164 | /* no 4 and 5 */ | ||
165 | loaddebug(¤t->thread, 6); | ||
166 | loaddebug(¤t->thread, 7); | ||
167 | #endif | ||
168 | } | ||
169 | |||
170 | } | 152 | } |
171 | 153 | ||
172 | /** | 154 | /** |
diff --git a/include/asm-generic/hw_breakpoint.h b/include/asm-generic/hw_breakpoint.h new file mode 100644 index 000000000000..9bf2d12eb74a --- /dev/null +++ b/include/asm-generic/hw_breakpoint.h | |||
@@ -0,0 +1,139 @@ | |||
1 | #ifndef _ASM_GENERIC_HW_BREAKPOINT_H | ||
2 | #define _ASM_GENERIC_HW_BREAKPOINT_H | ||
3 | |||
4 | #ifndef __ARCH_HW_BREAKPOINT_H | ||
5 | #error "Please don't include this file directly" | ||
6 | #endif | ||
7 | |||
8 | #ifdef __KERNEL__ | ||
9 | #include <linux/list.h> | ||
10 | #include <linux/types.h> | ||
11 | #include <linux/kallsyms.h> | ||
12 | |||
13 | /** | ||
14 | * struct hw_breakpoint - unified kernel/user-space hardware breakpoint | ||
15 | * @triggered: callback invoked after target address access | ||
16 | * @info: arch-specific breakpoint info (address, length, and type) | ||
17 | * | ||
18 | * %hw_breakpoint structures are the kernel's way of representing | ||
19 | * hardware breakpoints. These are data breakpoints | ||
20 | * (also known as "watchpoints", triggered on data access), and the breakpoint's | ||
21 | * target address can be located in either kernel space or user space. | ||
22 | * | ||
23 | * The breakpoint's address, length, and type are highly | ||
24 | * architecture-specific. The values are encoded in the @info field; you | ||
25 | * specify them when registering the breakpoint. To examine the encoded | ||
26 | * values use hw_breakpoint_get_{kaddress,uaddress,len,type}(), declared | ||
27 | * below. | ||
28 | * | ||
29 | * The address is specified as a regular kernel pointer (for kernel-space | ||
30 | * breakponts) or as an %__user pointer (for user-space breakpoints). | ||
31 | * With register_user_hw_breakpoint(), the address must refer to a | ||
32 | * location in user space. The breakpoint will be active only while the | ||
33 | * requested task is running. Conversely with | ||
34 | * register_kernel_hw_breakpoint(), the address must refer to a location | ||
35 | * in kernel space, and the breakpoint will be active on all CPUs | ||
36 | * regardless of the current task. | ||
37 | * | ||
38 | * The length is the breakpoint's extent in bytes, which is subject to | ||
39 | * certain limitations. include/asm/hw_breakpoint.h contains macros | ||
40 | * defining the available lengths for a specific architecture. Note that | ||
41 | * the address's alignment must match the length. The breakpoint will | ||
42 | * catch accesses to any byte in the range from address to address + | ||
43 | * (length - 1). | ||
44 | * | ||
45 | * The breakpoint's type indicates the sort of access that will cause it | ||
46 | * to trigger. Possible values may include: | ||
47 | * | ||
48 | * %HW_BREAKPOINT_RW (triggered on read or write access), | ||
49 | * %HW_BREAKPOINT_WRITE (triggered on write access), and | ||
50 | * %HW_BREAKPOINT_READ (triggered on read access). | ||
51 | * | ||
52 | * Appropriate macros are defined in include/asm/hw_breakpoint.h; not all | ||
53 | * possibilities are available on all architectures. Execute breakpoints | ||
54 | * must have length equal to the special value %HW_BREAKPOINT_LEN_EXECUTE. | ||
55 | * | ||
56 | * When a breakpoint gets hit, the @triggered callback is | ||
57 | * invoked in_interrupt with a pointer to the %hw_breakpoint structure and the | ||
58 | * processor registers. | ||
59 | * Data breakpoints occur after the memory access has taken place. | ||
60 | * Breakpoints are disabled during execution @triggered, to avoid | ||
61 | * recursive traps and allow unhindered access to breakpointed memory. | ||
62 | * | ||
63 | * This sample code sets a breakpoint on pid_max and registers a callback | ||
64 | * function for writes to that variable. Note that it is not portable | ||
65 | * as written, because not all architectures support HW_BREAKPOINT_LEN_4. | ||
66 | * | ||
67 | * ---------------------------------------------------------------------- | ||
68 | * | ||
69 | * #include <asm/hw_breakpoint.h> | ||
70 | * | ||
71 | * struct hw_breakpoint my_bp; | ||
72 | * | ||
73 | * static void my_triggered(struct hw_breakpoint *bp, struct pt_regs *regs) | ||
74 | * { | ||
75 | * printk(KERN_DEBUG "Inside triggered routine of breakpoint exception\n"); | ||
76 | * dump_stack(); | ||
77 | * .......<more debugging output>........ | ||
78 | * } | ||
79 | * | ||
80 | * static struct hw_breakpoint my_bp; | ||
81 | * | ||
82 | * static int init_module(void) | ||
83 | * { | ||
84 | * ..........<do anything>............ | ||
85 | * my_bp.info.type = HW_BREAKPOINT_WRITE; | ||
86 | * my_bp.info.len = HW_BREAKPOINT_LEN_4; | ||
87 | * | ||
88 | * my_bp.installed = (void *)my_bp_installed; | ||
89 | * | ||
90 | * rc = register_kernel_hw_breakpoint(&my_bp); | ||
91 | * ..........<do anything>............ | ||
92 | * } | ||
93 | * | ||
94 | * static void cleanup_module(void) | ||
95 | * { | ||
96 | * ..........<do anything>............ | ||
97 | * unregister_kernel_hw_breakpoint(&my_bp); | ||
98 | * ..........<do anything>............ | ||
99 | * } | ||
100 | * | ||
101 | * ---------------------------------------------------------------------- | ||
102 | */ | ||
103 | struct hw_breakpoint { | ||
104 | void (*triggered)(struct hw_breakpoint *, struct pt_regs *); | ||
105 | struct arch_hw_breakpoint info; | ||
106 | }; | ||
107 | |||
108 | /* | ||
109 | * len and type values are defined in include/asm/hw_breakpoint.h. | ||
110 | * Available values vary according to the architecture. On i386 the | ||
111 | * possibilities are: | ||
112 | * | ||
113 | * HW_BREAKPOINT_LEN_1 | ||
114 | * HW_BREAKPOINT_LEN_2 | ||
115 | * HW_BREAKPOINT_LEN_4 | ||
116 | * HW_BREAKPOINT_RW | ||
117 | * HW_BREAKPOINT_READ | ||
118 | * | ||
119 | * On other architectures HW_BREAKPOINT_LEN_8 may be available, and the | ||
120 | * 1-, 2-, and 4-byte lengths may be unavailable. There also may be | ||
121 | * HW_BREAKPOINT_WRITE. You can use #ifdef to check at compile time. | ||
122 | */ | ||
123 | |||
124 | extern int register_user_hw_breakpoint(struct task_struct *tsk, | ||
125 | struct hw_breakpoint *bp); | ||
126 | extern int modify_user_hw_breakpoint(struct task_struct *tsk, | ||
127 | struct hw_breakpoint *bp); | ||
128 | extern void unregister_user_hw_breakpoint(struct task_struct *tsk, | ||
129 | struct hw_breakpoint *bp); | ||
130 | /* | ||
131 | * Kernel breakpoints are not associated with any particular thread. | ||
132 | */ | ||
133 | extern int register_kernel_hw_breakpoint(struct hw_breakpoint *bp); | ||
134 | extern void unregister_kernel_hw_breakpoint(struct hw_breakpoint *bp); | ||
135 | |||
136 | extern unsigned int hbp_kernel_pos; | ||
137 | |||
138 | #endif /* __KERNEL__ */ | ||
139 | #endif /* _ASM_GENERIC_HW_BREAKPOINT_H */ | ||
diff --git a/kernel/Makefile b/kernel/Makefile index 9df4501cb921..f88decb1b445 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
@@ -97,6 +97,7 @@ obj-$(CONFIG_TRACING) += trace/ | |||
97 | obj-$(CONFIG_X86_DS) += trace/ | 97 | obj-$(CONFIG_X86_DS) += trace/ |
98 | obj-$(CONFIG_SMP) += sched_cpupri.o | 98 | obj-$(CONFIG_SMP) += sched_cpupri.o |
99 | obj-$(CONFIG_SLOW_WORK) += slow-work.o | 99 | obj-$(CONFIG_SLOW_WORK) += slow-work.o |
100 | obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o | ||
100 | obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o | 101 | obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o |
101 | 102 | ||
102 | ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) | 103 | ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) |
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c new file mode 100644 index 000000000000..c1f64e65a9f3 --- /dev/null +++ b/kernel/hw_breakpoint.c | |||
@@ -0,0 +1,378 @@ | |||
1 | /* | ||
2 | * This program is free software; you can redistribute it and/or modify | ||
3 | * it under the terms of the GNU General Public License as published by | ||
4 | * the Free Software Foundation; either version 2 of the License, or | ||
5 | * (at your option) any later version. | ||
6 | * | ||
7 | * This program is distributed in the hope that it will be useful, | ||
8 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
10 | * GNU General Public License for more details. | ||
11 | * | ||
12 | * You should have received a copy of the GNU General Public License | ||
13 | * along with this program; if not, write to the Free Software | ||
14 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
15 | * | ||
16 | * Copyright (C) 2007 Alan Stern | ||
17 | * Copyright (C) IBM Corporation, 2009 | ||
18 | */ | ||
19 | |||
20 | /* | ||
21 | * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility, | ||
22 | * using the CPU's debug registers. | ||
23 | * This file contains the arch-independent routines. | ||
24 | */ | ||
25 | |||
26 | #include <linux/irqflags.h> | ||
27 | #include <linux/kallsyms.h> | ||
28 | #include <linux/notifier.h> | ||
29 | #include <linux/kprobes.h> | ||
30 | #include <linux/kdebug.h> | ||
31 | #include <linux/kernel.h> | ||
32 | #include <linux/module.h> | ||
33 | #include <linux/percpu.h> | ||
34 | #include <linux/sched.h> | ||
35 | #include <linux/init.h> | ||
36 | #include <linux/smp.h> | ||
37 | |||
38 | #include <asm/hw_breakpoint.h> | ||
39 | #include <asm/processor.h> | ||
40 | |||
41 | #ifdef CONFIG_X86 | ||
42 | #include <asm/debugreg.h> | ||
43 | #endif | ||
44 | /* | ||
45 | * Spinlock that protects all (un)register operations over kernel/user-space | ||
46 | * breakpoint requests | ||
47 | */ | ||
48 | static DEFINE_SPINLOCK(hw_breakpoint_lock); | ||
49 | |||
50 | /* Array of kernel-space breakpoint structures */ | ||
51 | struct hw_breakpoint *hbp_kernel[HBP_NUM]; | ||
52 | |||
53 | /* | ||
54 | * Per-processor copy of hbp_kernel[]. Used only when hbp_kernel is being | ||
55 | * modified but we need the older copy to handle any hbp exceptions. It will | ||
56 | * sync with hbp_kernel[] value after updation is done through IPIs. | ||
57 | */ | ||
58 | DEFINE_PER_CPU(struct hw_breakpoint*, this_hbp_kernel[HBP_NUM]); | ||
59 | |||
60 | /* | ||
61 | * Kernel breakpoints grow downwards, starting from HBP_NUM | ||
62 | * 'hbp_kernel_pos' denotes lowest numbered breakpoint register occupied for | ||
63 | * kernel-space request. We will initialise it here and not in an __init | ||
64 | * routine because load_debug_registers(), which uses this variable can be | ||
65 | * called very early during CPU initialisation. | ||
66 | */ | ||
67 | unsigned int hbp_kernel_pos = HBP_NUM; | ||
68 | |||
69 | /* | ||
70 | * An array containing refcount of threads using a given bkpt register | ||
71 | * Accesses are synchronised by acquiring hw_breakpoint_lock | ||
72 | */ | ||
73 | unsigned int hbp_user_refcount[HBP_NUM]; | ||
74 | |||
75 | /* | ||
76 | * Load the debug registers during startup of a CPU. | ||
77 | */ | ||
78 | void load_debug_registers(void) | ||
79 | { | ||
80 | unsigned long flags; | ||
81 | struct task_struct *tsk = current; | ||
82 | |||
83 | spin_lock_bh(&hw_breakpoint_lock); | ||
84 | |||
85 | /* Prevent IPIs for new kernel breakpoint updates */ | ||
86 | local_irq_save(flags); | ||
87 | arch_update_kernel_hw_breakpoint(NULL); | ||
88 | local_irq_restore(flags); | ||
89 | |||
90 | if (test_tsk_thread_flag(tsk, TIF_DEBUG)) | ||
91 | arch_install_thread_hw_breakpoint(tsk); | ||
92 | |||
93 | spin_unlock_bh(&hw_breakpoint_lock); | ||
94 | } | ||
95 | |||
96 | /* | ||
97 | * Erase all the hardware breakpoint info associated with a thread. | ||
98 | * | ||
99 | * If tsk != current then tsk must not be usable (for example, a | ||
100 | * child being cleaned up from a failed fork). | ||
101 | */ | ||
102 | void flush_thread_hw_breakpoint(struct task_struct *tsk) | ||
103 | { | ||
104 | int i; | ||
105 | struct thread_struct *thread = &(tsk->thread); | ||
106 | |||
107 | spin_lock_bh(&hw_breakpoint_lock); | ||
108 | |||
109 | /* The thread no longer has any breakpoints associated with it */ | ||
110 | clear_tsk_thread_flag(tsk, TIF_DEBUG); | ||
111 | for (i = 0; i < HBP_NUM; i++) { | ||
112 | if (thread->hbp[i]) { | ||
113 | hbp_user_refcount[i]--; | ||
114 | kfree(thread->hbp[i]); | ||
115 | thread->hbp[i] = NULL; | ||
116 | } | ||
117 | } | ||
118 | |||
119 | arch_flush_thread_hw_breakpoint(tsk); | ||
120 | |||
121 | /* Actually uninstall the breakpoints if necessary */ | ||
122 | if (tsk == current) | ||
123 | arch_uninstall_thread_hw_breakpoint(); | ||
124 | spin_unlock_bh(&hw_breakpoint_lock); | ||
125 | } | ||
126 | |||
127 | /* | ||
128 | * Copy the hardware breakpoint info from a thread to its cloned child. | ||
129 | */ | ||
130 | int copy_thread_hw_breakpoint(struct task_struct *tsk, | ||
131 | struct task_struct *child, unsigned long clone_flags) | ||
132 | { | ||
133 | /* | ||
134 | * We will assume that breakpoint settings are not inherited | ||
135 | * and the child starts out with no debug registers set. | ||
136 | * But what about CLONE_PTRACE? | ||
137 | */ | ||
138 | clear_tsk_thread_flag(child, TIF_DEBUG); | ||
139 | |||
140 | /* We will call flush routine since the debugregs are not inherited */ | ||
141 | arch_flush_thread_hw_breakpoint(child); | ||
142 | |||
143 | return 0; | ||
144 | } | ||
145 | |||
146 | static int __register_user_hw_breakpoint(int pos, struct task_struct *tsk, | ||
147 | struct hw_breakpoint *bp) | ||
148 | { | ||
149 | struct thread_struct *thread = &(tsk->thread); | ||
150 | int rc; | ||
151 | |||
152 | /* Do not overcommit. Fail if kernel has used the hbp registers */ | ||
153 | if (pos >= hbp_kernel_pos) | ||
154 | return -ENOSPC; | ||
155 | |||
156 | rc = arch_validate_hwbkpt_settings(bp, tsk); | ||
157 | if (rc) | ||
158 | return rc; | ||
159 | |||
160 | thread->hbp[pos] = bp; | ||
161 | hbp_user_refcount[pos]++; | ||
162 | |||
163 | arch_update_user_hw_breakpoint(pos, tsk); | ||
164 | /* | ||
165 | * Does it need to be installed right now? | ||
166 | * Otherwise it will get installed the next time tsk runs | ||
167 | */ | ||
168 | if (tsk == current) | ||
169 | arch_install_thread_hw_breakpoint(tsk); | ||
170 | |||
171 | return rc; | ||
172 | } | ||
173 | |||
174 | /* | ||
175 | * Modify the address of a hbp register already in use by the task | ||
176 | * Do not invoke this in-lieu of a __unregister_user_hw_breakpoint() | ||
177 | */ | ||
178 | static int __modify_user_hw_breakpoint(int pos, struct task_struct *tsk, | ||
179 | struct hw_breakpoint *bp) | ||
180 | { | ||
181 | struct thread_struct *thread = &(tsk->thread); | ||
182 | |||
183 | if ((pos >= hbp_kernel_pos) || (arch_validate_hwbkpt_settings(bp, tsk))) | ||
184 | return -EINVAL; | ||
185 | |||
186 | if (thread->hbp[pos] == NULL) | ||
187 | return -EINVAL; | ||
188 | |||
189 | thread->hbp[pos] = bp; | ||
190 | /* | ||
191 | * 'pos' must be that of a hbp register already used by 'tsk' | ||
192 | * Otherwise arch_modify_user_hw_breakpoint() will fail | ||
193 | */ | ||
194 | arch_update_user_hw_breakpoint(pos, tsk); | ||
195 | |||
196 | if (tsk == current) | ||
197 | arch_install_thread_hw_breakpoint(tsk); | ||
198 | |||
199 | return 0; | ||
200 | } | ||
201 | |||
202 | static void __unregister_user_hw_breakpoint(int pos, struct task_struct *tsk) | ||
203 | { | ||
204 | hbp_user_refcount[pos]--; | ||
205 | tsk->thread.hbp[pos] = NULL; | ||
206 | |||
207 | arch_update_user_hw_breakpoint(pos, tsk); | ||
208 | |||
209 | if (tsk == current) | ||
210 | arch_install_thread_hw_breakpoint(tsk); | ||
211 | } | ||
212 | |||
213 | /** | ||
214 | * register_user_hw_breakpoint - register a hardware breakpoint for user space | ||
215 | * @tsk: pointer to 'task_struct' of the process to which the address belongs | ||
216 | * @bp: the breakpoint structure to register | ||
217 | * | ||
218 | * @bp.info->name or @bp.info->address, @bp.info->len, @bp.info->type and | ||
219 | * @bp->triggered must be set properly before invocation | ||
220 | * | ||
221 | */ | ||
222 | int register_user_hw_breakpoint(struct task_struct *tsk, | ||
223 | struct hw_breakpoint *bp) | ||
224 | { | ||
225 | struct thread_struct *thread = &(tsk->thread); | ||
226 | int i, rc = -ENOSPC; | ||
227 | |||
228 | spin_lock_bh(&hw_breakpoint_lock); | ||
229 | |||
230 | for (i = 0; i < hbp_kernel_pos; i++) { | ||
231 | if (!thread->hbp[i]) { | ||
232 | rc = __register_user_hw_breakpoint(i, tsk, bp); | ||
233 | break; | ||
234 | } | ||
235 | } | ||
236 | if (!rc) | ||
237 | set_tsk_thread_flag(tsk, TIF_DEBUG); | ||
238 | |||
239 | spin_unlock_bh(&hw_breakpoint_lock); | ||
240 | return rc; | ||
241 | } | ||
242 | EXPORT_SYMBOL_GPL(register_user_hw_breakpoint); | ||
243 | |||
244 | /** | ||
245 | * modify_user_hw_breakpoint - modify a user-space hardware breakpoint | ||
246 | * @tsk: pointer to 'task_struct' of the process to which the address belongs | ||
247 | * @bp: the breakpoint structure to unregister | ||
248 | * | ||
249 | */ | ||
250 | int modify_user_hw_breakpoint(struct task_struct *tsk, struct hw_breakpoint *bp) | ||
251 | { | ||
252 | struct thread_struct *thread = &(tsk->thread); | ||
253 | int i, ret = -ENOENT; | ||
254 | |||
255 | spin_lock_bh(&hw_breakpoint_lock); | ||
256 | for (i = 0; i < hbp_kernel_pos; i++) { | ||
257 | if (bp == thread->hbp[i]) { | ||
258 | ret = __modify_user_hw_breakpoint(i, tsk, bp); | ||
259 | break; | ||
260 | } | ||
261 | } | ||
262 | spin_unlock_bh(&hw_breakpoint_lock); | ||
263 | return ret; | ||
264 | } | ||
265 | EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint); | ||
266 | |||
267 | /** | ||
268 | * unregister_user_hw_breakpoint - unregister a user-space hardware breakpoint | ||
269 | * @tsk: pointer to 'task_struct' of the process to which the address belongs | ||
270 | * @bp: the breakpoint structure to unregister | ||
271 | * | ||
272 | */ | ||
273 | void unregister_user_hw_breakpoint(struct task_struct *tsk, | ||
274 | struct hw_breakpoint *bp) | ||
275 | { | ||
276 | struct thread_struct *thread = &(tsk->thread); | ||
277 | int i, pos = -1, hbp_counter = 0; | ||
278 | |||
279 | spin_lock_bh(&hw_breakpoint_lock); | ||
280 | for (i = 0; i < hbp_kernel_pos; i++) { | ||
281 | if (thread->hbp[i]) | ||
282 | hbp_counter++; | ||
283 | if (bp == thread->hbp[i]) | ||
284 | pos = i; | ||
285 | } | ||
286 | if (pos >= 0) { | ||
287 | __unregister_user_hw_breakpoint(pos, tsk); | ||
288 | hbp_counter--; | ||
289 | } | ||
290 | if (!hbp_counter) | ||
291 | clear_tsk_thread_flag(tsk, TIF_DEBUG); | ||
292 | |||
293 | spin_unlock_bh(&hw_breakpoint_lock); | ||
294 | } | ||
295 | EXPORT_SYMBOL_GPL(unregister_user_hw_breakpoint); | ||
296 | |||
297 | /** | ||
298 | * register_kernel_hw_breakpoint - register a hardware breakpoint for kernel space | ||
299 | * @bp: the breakpoint structure to register | ||
300 | * | ||
301 | * @bp.info->name or @bp.info->address, @bp.info->len, @bp.info->type and | ||
302 | * @bp->triggered must be set properly before invocation | ||
303 | * | ||
304 | */ | ||
305 | int register_kernel_hw_breakpoint(struct hw_breakpoint *bp) | ||
306 | { | ||
307 | int rc; | ||
308 | |||
309 | rc = arch_validate_hwbkpt_settings(bp, NULL); | ||
310 | if (rc) | ||
311 | return rc; | ||
312 | |||
313 | spin_lock_bh(&hw_breakpoint_lock); | ||
314 | |||
315 | rc = -ENOSPC; | ||
316 | /* Check if we are over-committing */ | ||
317 | if ((hbp_kernel_pos > 0) && (!hbp_user_refcount[hbp_kernel_pos-1])) { | ||
318 | hbp_kernel_pos--; | ||
319 | hbp_kernel[hbp_kernel_pos] = bp; | ||
320 | on_each_cpu(arch_update_kernel_hw_breakpoint, NULL, 1); | ||
321 | rc = 0; | ||
322 | } | ||
323 | |||
324 | spin_unlock_bh(&hw_breakpoint_lock); | ||
325 | return rc; | ||
326 | } | ||
327 | EXPORT_SYMBOL_GPL(register_kernel_hw_breakpoint); | ||
328 | |||
329 | /** | ||
330 | * unregister_kernel_hw_breakpoint - unregister a HW breakpoint for kernel space | ||
331 | * @bp: the breakpoint structure to unregister | ||
332 | * | ||
333 | * Uninstalls and unregisters @bp. | ||
334 | */ | ||
335 | void unregister_kernel_hw_breakpoint(struct hw_breakpoint *bp) | ||
336 | { | ||
337 | int i, j; | ||
338 | |||
339 | spin_lock_bh(&hw_breakpoint_lock); | ||
340 | |||
341 | /* Find the 'bp' in our list of breakpoints for kernel */ | ||
342 | for (i = hbp_kernel_pos; i < HBP_NUM; i++) | ||
343 | if (bp == hbp_kernel[i]) | ||
344 | break; | ||
345 | |||
346 | /* Check if we did not find a match for 'bp'. If so return early */ | ||
347 | if (i == HBP_NUM) { | ||
348 | spin_unlock_bh(&hw_breakpoint_lock); | ||
349 | return; | ||
350 | } | ||
351 | |||
352 | /* | ||
353 | * We'll shift the breakpoints one-level above to compact if | ||
354 | * unregistration creates a hole | ||
355 | */ | ||
356 | for (j = i; j > hbp_kernel_pos; j--) | ||
357 | hbp_kernel[j] = hbp_kernel[j-1]; | ||
358 | |||
359 | hbp_kernel[hbp_kernel_pos] = NULL; | ||
360 | on_each_cpu(arch_update_kernel_hw_breakpoint, NULL, 1); | ||
361 | hbp_kernel_pos++; | ||
362 | |||
363 | spin_unlock_bh(&hw_breakpoint_lock); | ||
364 | } | ||
365 | EXPORT_SYMBOL_GPL(unregister_kernel_hw_breakpoint); | ||
366 | |||
367 | static struct notifier_block hw_breakpoint_exceptions_nb = { | ||
368 | .notifier_call = hw_breakpoint_exceptions_notify, | ||
369 | /* we need to be notified first */ | ||
370 | .priority = 0x7fffffff | ||
371 | }; | ||
372 | |||
373 | static int __init init_hw_breakpoint(void) | ||
374 | { | ||
375 | return register_die_notifier(&hw_breakpoint_exceptions_nb); | ||
376 | } | ||
377 | |||
378 | core_initcall(init_hw_breakpoint); | ||
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 61071fecc82e..ae048a2dbbe8 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig | |||
@@ -314,6 +314,27 @@ config POWER_TRACER | |||
314 | power management decisions, specifically the C-state and P-state | 314 | power management decisions, specifically the C-state and P-state |
315 | behavior. | 315 | behavior. |
316 | 316 | ||
317 | config KSYM_TRACER | ||
318 | bool "Trace read and write access on kernel memory locations" | ||
319 | depends on HAVE_HW_BREAKPOINT | ||
320 | select TRACING | ||
321 | help | ||
322 | This tracer helps find read and write operations on any given kernel | ||
323 | symbol i.e. /proc/kallsyms. | ||
324 | |||
325 | config PROFILE_KSYM_TRACER | ||
326 | bool "Profile all kernel memory accesses on 'watched' variables" | ||
327 | depends on KSYM_TRACER | ||
328 | help | ||
329 | This tracer profiles kernel accesses on variables watched through the | ||
330 | ksym tracer ftrace plugin. Depending upon the hardware, all read | ||
331 | and write operations on kernel variables can be monitored for | ||
332 | accesses. | ||
333 | |||
334 | The results will be displayed in: | ||
335 | /debugfs/tracing/profile_ksym | ||
336 | |||
337 | Say N if unsure. | ||
317 | 338 | ||
318 | config STACK_TRACER | 339 | config STACK_TRACER |
319 | bool "Trace max stack" | 340 | bool "Trace max stack" |
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 844164dca90a..ce3b1cd02732 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile | |||
@@ -54,5 +54,6 @@ obj-$(CONFIG_EVENT_TRACING) += trace_export.o | |||
54 | obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o | 54 | obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o |
55 | obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o | 55 | obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o |
56 | obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o | 56 | obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o |
57 | obj-$(CONFIG_KSYM_TRACER) += trace_ksym.o | ||
57 | 58 | ||
58 | libftrace-y := ftrace.o | 59 | libftrace-y := ftrace.o |
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 6e735d4771f8..7d5cc37b8fca 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h | |||
@@ -15,6 +15,10 @@ | |||
15 | #include <linux/trace_seq.h> | 15 | #include <linux/trace_seq.h> |
16 | #include <linux/ftrace_event.h> | 16 | #include <linux/ftrace_event.h> |
17 | 17 | ||
18 | #ifdef CONFIG_KSYM_TRACER | ||
19 | #include <asm/hw_breakpoint.h> | ||
20 | #endif | ||
21 | |||
18 | enum trace_type { | 22 | enum trace_type { |
19 | __TRACE_FIRST_TYPE = 0, | 23 | __TRACE_FIRST_TYPE = 0, |
20 | 24 | ||
@@ -40,6 +44,7 @@ enum trace_type { | |||
40 | TRACE_KMEM_FREE, | 44 | TRACE_KMEM_FREE, |
41 | TRACE_POWER, | 45 | TRACE_POWER, |
42 | TRACE_BLK, | 46 | TRACE_BLK, |
47 | TRACE_KSYM, | ||
43 | 48 | ||
44 | __TRACE_LAST_TYPE, | 49 | __TRACE_LAST_TYPE, |
45 | }; | 50 | }; |
@@ -207,6 +212,21 @@ struct syscall_trace_exit { | |||
207 | unsigned long ret; | 212 | unsigned long ret; |
208 | }; | 213 | }; |
209 | 214 | ||
215 | #define KSYM_SELFTEST_ENTRY "ksym_selftest_dummy" | ||
216 | extern int process_new_ksym_entry(char *ksymname, int op, unsigned long addr); | ||
217 | |||
218 | struct trace_ksym { | ||
219 | struct trace_entry ent; | ||
220 | struct hw_breakpoint *ksym_hbp; | ||
221 | unsigned long ksym_addr; | ||
222 | unsigned long ip; | ||
223 | #ifdef CONFIG_PROFILE_KSYM_TRACER | ||
224 | unsigned long counter; | ||
225 | #endif | ||
226 | struct hlist_node ksym_hlist; | ||
227 | char ksym_name[KSYM_NAME_LEN]; | ||
228 | char p_name[TASK_COMM_LEN]; | ||
229 | }; | ||
210 | 230 | ||
211 | /* | 231 | /* |
212 | * trace_flag_type is an enumeration that holds different | 232 | * trace_flag_type is an enumeration that holds different |
@@ -323,6 +343,7 @@ extern void __ftrace_bad_type(void); | |||
323 | TRACE_SYSCALL_ENTER); \ | 343 | TRACE_SYSCALL_ENTER); \ |
324 | IF_ASSIGN(var, ent, struct syscall_trace_exit, \ | 344 | IF_ASSIGN(var, ent, struct syscall_trace_exit, \ |
325 | TRACE_SYSCALL_EXIT); \ | 345 | TRACE_SYSCALL_EXIT); \ |
346 | IF_ASSIGN(var, ent, struct trace_ksym, TRACE_KSYM); \ | ||
326 | __ftrace_bad_type(); \ | 347 | __ftrace_bad_type(); \ |
327 | } while (0) | 348 | } while (0) |
328 | 349 | ||
@@ -540,6 +561,8 @@ extern int trace_selftest_startup_branch(struct tracer *trace, | |||
540 | struct trace_array *tr); | 561 | struct trace_array *tr); |
541 | extern int trace_selftest_startup_hw_branches(struct tracer *trace, | 562 | extern int trace_selftest_startup_hw_branches(struct tracer *trace, |
542 | struct trace_array *tr); | 563 | struct trace_array *tr); |
564 | extern int trace_selftest_startup_ksym(struct tracer *trace, | ||
565 | struct trace_array *tr); | ||
543 | #endif /* CONFIG_FTRACE_STARTUP_TEST */ | 566 | #endif /* CONFIG_FTRACE_STARTUP_TEST */ |
544 | 567 | ||
545 | extern void *head_page(struct trace_array_cpu *data); | 568 | extern void *head_page(struct trace_array_cpu *data); |
diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c new file mode 100644 index 000000000000..eef97e7c8db7 --- /dev/null +++ b/kernel/trace/trace_ksym.c | |||
@@ -0,0 +1,525 @@ | |||
1 | /* | ||
2 | * trace_ksym.c - Kernel Symbol Tracer | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License as published by | ||
6 | * the Free Software Foundation; either version 2 of the License, or | ||
7 | * (at your option) any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write to the Free Software | ||
16 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
17 | * | ||
18 | * Copyright (C) IBM Corporation, 2009 | ||
19 | */ | ||
20 | |||
21 | #include <linux/kallsyms.h> | ||
22 | #include <linux/uaccess.h> | ||
23 | #include <linux/debugfs.h> | ||
24 | #include <linux/ftrace.h> | ||
25 | #include <linux/module.h> | ||
26 | #include <linux/fs.h> | ||
27 | |||
28 | #include "trace_output.h" | ||
29 | #include "trace_stat.h" | ||
30 | #include "trace.h" | ||
31 | |||
32 | /* For now, let us restrict the no. of symbols traced simultaneously to number | ||
33 | * of available hardware breakpoint registers. | ||
34 | */ | ||
35 | #define KSYM_TRACER_MAX HBP_NUM | ||
36 | |||
37 | #define KSYM_TRACER_OP_LEN 3 /* rw- */ | ||
38 | #define KSYM_FILTER_ENTRY_LEN (KSYM_NAME_LEN + KSYM_TRACER_OP_LEN + 1) | ||
39 | |||
40 | static struct trace_array *ksym_trace_array; | ||
41 | |||
42 | static unsigned int ksym_filter_entry_count; | ||
43 | static unsigned int ksym_tracing_enabled; | ||
44 | |||
45 | static HLIST_HEAD(ksym_filter_head); | ||
46 | |||
47 | static DEFINE_MUTEX(ksym_tracer_mutex); | ||
48 | |||
49 | #ifdef CONFIG_PROFILE_KSYM_TRACER | ||
50 | |||
51 | #define MAX_UL_INT 0xffffffff | ||
52 | |||
53 | void ksym_collect_stats(unsigned long hbp_hit_addr) | ||
54 | { | ||
55 | struct hlist_node *node; | ||
56 | struct trace_ksym *entry; | ||
57 | |||
58 | rcu_read_lock(); | ||
59 | hlist_for_each_entry_rcu(entry, node, &ksym_filter_head, ksym_hlist) { | ||
60 | if ((entry->ksym_addr == hbp_hit_addr) && | ||
61 | (entry->counter <= MAX_UL_INT)) { | ||
62 | entry->counter++; | ||
63 | break; | ||
64 | } | ||
65 | } | ||
66 | rcu_read_unlock(); | ||
67 | } | ||
68 | #endif /* CONFIG_PROFILE_KSYM_TRACER */ | ||
69 | |||
70 | void ksym_hbp_handler(struct hw_breakpoint *hbp, struct pt_regs *regs) | ||
71 | { | ||
72 | struct ring_buffer_event *event; | ||
73 | struct trace_array *tr; | ||
74 | struct trace_ksym *entry; | ||
75 | int pc; | ||
76 | |||
77 | if (!ksym_tracing_enabled) | ||
78 | return; | ||
79 | |||
80 | tr = ksym_trace_array; | ||
81 | pc = preempt_count(); | ||
82 | |||
83 | event = trace_buffer_lock_reserve(tr, TRACE_KSYM, | ||
84 | sizeof(*entry), 0, pc); | ||
85 | if (!event) | ||
86 | return; | ||
87 | |||
88 | entry = ring_buffer_event_data(event); | ||
89 | strlcpy(entry->ksym_name, hbp->info.name, KSYM_SYMBOL_LEN); | ||
90 | entry->ksym_hbp = hbp; | ||
91 | entry->ip = instruction_pointer(regs); | ||
92 | strlcpy(entry->p_name, current->comm, TASK_COMM_LEN); | ||
93 | #ifdef CONFIG_PROFILE_KSYM_TRACER | ||
94 | ksym_collect_stats(hbp->info.address); | ||
95 | #endif /* CONFIG_PROFILE_KSYM_TRACER */ | ||
96 | |||
97 | trace_buffer_unlock_commit(tr, event, 0, pc); | ||
98 | } | ||
99 | |||
100 | /* Valid access types are represented as | ||
101 | * | ||
102 | * rw- : Set Read/Write Access Breakpoint | ||
103 | * -w- : Set Write Access Breakpoint | ||
104 | * --- : Clear Breakpoints | ||
105 | * --x : Set Execution Break points (Not available yet) | ||
106 | * | ||
107 | */ | ||
108 | static int ksym_trace_get_access_type(char *access_str) | ||
109 | { | ||
110 | int pos, access = 0; | ||
111 | |||
112 | for (pos = 0; pos < KSYM_TRACER_OP_LEN; pos++) { | ||
113 | switch (access_str[pos]) { | ||
114 | case 'r': | ||
115 | access += (pos == 0) ? 4 : -1; | ||
116 | break; | ||
117 | case 'w': | ||
118 | access += (pos == 1) ? 2 : -1; | ||
119 | break; | ||
120 | case '-': | ||
121 | break; | ||
122 | default: | ||
123 | return -EINVAL; | ||
124 | } | ||
125 | } | ||
126 | |||
127 | switch (access) { | ||
128 | case 6: | ||
129 | access = HW_BREAKPOINT_RW; | ||
130 | break; | ||
131 | case 2: | ||
132 | access = HW_BREAKPOINT_WRITE; | ||
133 | break; | ||
134 | case 0: | ||
135 | access = 0; | ||
136 | } | ||
137 | |||
138 | return access; | ||
139 | } | ||
140 | |||
141 | /* | ||
142 | * There can be several possible malformed requests and we attempt to capture | ||
143 | * all of them. We enumerate some of the rules | ||
144 | * 1. We will not allow kernel symbols with ':' since it is used as a delimiter. | ||
145 | * i.e. multiple ':' symbols disallowed. Possible uses are of the form | ||
146 | * <module>:<ksym_name>:<op>. | ||
147 | * 2. No delimiter symbol ':' in the input string | ||
148 | * 3. Spurious operator symbols or symbols not in their respective positions | ||
149 | * 4. <ksym_name>:--- i.e. clear breakpoint request when ksym_name not in file | ||
150 | * 5. Kernel symbol not a part of /proc/kallsyms | ||
151 | * 6. Duplicate requests | ||
152 | */ | ||
153 | static int parse_ksym_trace_str(char *input_string, char **ksymname, | ||
154 | unsigned long *addr) | ||
155 | { | ||
156 | char *delimiter = ":"; | ||
157 | int ret; | ||
158 | |||
159 | ret = -EINVAL; | ||
160 | *ksymname = strsep(&input_string, delimiter); | ||
161 | *addr = kallsyms_lookup_name(*ksymname); | ||
162 | |||
163 | /* Check for malformed request: (2), (1) and (5) */ | ||
164 | if ((!input_string) || | ||
165 | (strlen(input_string) != (KSYM_TRACER_OP_LEN + 1)) || | ||
166 | (*addr == 0)) | ||
167 | goto return_code; | ||
168 | ret = ksym_trace_get_access_type(input_string); | ||
169 | |||
170 | return_code: | ||
171 | return ret; | ||
172 | } | ||
173 | |||
174 | int process_new_ksym_entry(char *ksymname, int op, unsigned long addr) | ||
175 | { | ||
176 | struct trace_ksym *entry; | ||
177 | int ret; | ||
178 | |||
179 | if (ksym_filter_entry_count >= KSYM_TRACER_MAX) { | ||
180 | printk(KERN_ERR "ksym_tracer: Maximum limit:(%d) reached. No" | ||
181 | " new requests for tracing can be accepted now.\n", | ||
182 | KSYM_TRACER_MAX); | ||
183 | return -ENOSPC; | ||
184 | } | ||
185 | |||
186 | entry = kzalloc(sizeof(struct trace_ksym), GFP_KERNEL); | ||
187 | if (!entry) | ||
188 | return -ENOMEM; | ||
189 | |||
190 | entry->ksym_hbp = kzalloc(sizeof(struct hw_breakpoint), GFP_KERNEL); | ||
191 | if (!entry->ksym_hbp) { | ||
192 | kfree(entry); | ||
193 | return -ENOMEM; | ||
194 | } | ||
195 | |||
196 | entry->ksym_hbp->info.name = ksymname; | ||
197 | entry->ksym_hbp->info.type = op; | ||
198 | entry->ksym_addr = entry->ksym_hbp->info.address = addr; | ||
199 | #ifdef CONFIG_X86 | ||
200 | entry->ksym_hbp->info.len = HW_BREAKPOINT_LEN_4; | ||
201 | #endif | ||
202 | entry->ksym_hbp->triggered = (void *)ksym_hbp_handler; | ||
203 | |||
204 | ret = register_kernel_hw_breakpoint(entry->ksym_hbp); | ||
205 | if (ret < 0) { | ||
206 | printk(KERN_INFO "ksym_tracer request failed. Try again" | ||
207 | " later!!\n"); | ||
208 | kfree(entry->ksym_hbp); | ||
209 | kfree(entry); | ||
210 | return -EAGAIN; | ||
211 | } | ||
212 | hlist_add_head_rcu(&(entry->ksym_hlist), &ksym_filter_head); | ||
213 | ksym_filter_entry_count++; | ||
214 | |||
215 | return 0; | ||
216 | } | ||
217 | |||
218 | static ssize_t ksym_trace_filter_read(struct file *filp, char __user *ubuf, | ||
219 | size_t count, loff_t *ppos) | ||
220 | { | ||
221 | struct trace_ksym *entry; | ||
222 | struct hlist_node *node; | ||
223 | char buf[KSYM_FILTER_ENTRY_LEN * KSYM_TRACER_MAX]; | ||
224 | ssize_t ret, cnt = 0; | ||
225 | |||
226 | mutex_lock(&ksym_tracer_mutex); | ||
227 | |||
228 | hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) { | ||
229 | cnt += snprintf(&buf[cnt], KSYM_FILTER_ENTRY_LEN - cnt, "%s:", | ||
230 | entry->ksym_hbp->info.name); | ||
231 | if (entry->ksym_hbp->info.type == HW_BREAKPOINT_WRITE) | ||
232 | cnt += snprintf(&buf[cnt], KSYM_FILTER_ENTRY_LEN - cnt, | ||
233 | "-w-\n"); | ||
234 | else if (entry->ksym_hbp->info.type == HW_BREAKPOINT_RW) | ||
235 | cnt += snprintf(&buf[cnt], KSYM_FILTER_ENTRY_LEN - cnt, | ||
236 | "rw-\n"); | ||
237 | } | ||
238 | ret = simple_read_from_buffer(ubuf, count, ppos, buf, strlen(buf)); | ||
239 | mutex_unlock(&ksym_tracer_mutex); | ||
240 | |||
241 | return ret; | ||
242 | } | ||
243 | |||
244 | static ssize_t ksym_trace_filter_write(struct file *file, | ||
245 | const char __user *buffer, | ||
246 | size_t count, loff_t *ppos) | ||
247 | { | ||
248 | struct trace_ksym *entry; | ||
249 | struct hlist_node *node; | ||
250 | char *input_string, *ksymname = NULL; | ||
251 | unsigned long ksym_addr = 0; | ||
252 | int ret, op, changed = 0; | ||
253 | |||
254 | /* Ignore echo "" > ksym_trace_filter */ | ||
255 | if (count == 0) | ||
256 | return 0; | ||
257 | |||
258 | input_string = kzalloc(count, GFP_KERNEL); | ||
259 | if (!input_string) | ||
260 | return -ENOMEM; | ||
261 | |||
262 | if (copy_from_user(input_string, buffer, count)) { | ||
263 | kfree(input_string); | ||
264 | return -EFAULT; | ||
265 | } | ||
266 | |||
267 | ret = op = parse_ksym_trace_str(input_string, &ksymname, &ksym_addr); | ||
268 | if (ret < 0) { | ||
269 | kfree(input_string); | ||
270 | return ret; | ||
271 | } | ||
272 | |||
273 | mutex_lock(&ksym_tracer_mutex); | ||
274 | |||
275 | ret = -EINVAL; | ||
276 | hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) { | ||
277 | if (entry->ksym_addr == ksym_addr) { | ||
278 | /* Check for malformed request: (6) */ | ||
279 | if (entry->ksym_hbp->info.type != op) | ||
280 | changed = 1; | ||
281 | else | ||
282 | goto err_ret; | ||
283 | break; | ||
284 | } | ||
285 | } | ||
286 | if (changed) { | ||
287 | unregister_kernel_hw_breakpoint(entry->ksym_hbp); | ||
288 | entry->ksym_hbp->info.type = op; | ||
289 | if (op > 0) { | ||
290 | ret = register_kernel_hw_breakpoint(entry->ksym_hbp); | ||
291 | if (ret == 0) { | ||
292 | ret = count; | ||
293 | goto unlock_ret_path; | ||
294 | } | ||
295 | } | ||
296 | ksym_filter_entry_count--; | ||
297 | hlist_del_rcu(&(entry->ksym_hlist)); | ||
298 | synchronize_rcu(); | ||
299 | kfree(entry->ksym_hbp); | ||
300 | kfree(entry); | ||
301 | ret = count; | ||
302 | goto err_ret; | ||
303 | } else { | ||
304 | /* Check for malformed request: (4) */ | ||
305 | if (op == 0) | ||
306 | goto err_ret; | ||
307 | ret = process_new_ksym_entry(ksymname, op, ksym_addr); | ||
308 | if (ret) | ||
309 | goto err_ret; | ||
310 | } | ||
311 | ret = count; | ||
312 | goto unlock_ret_path; | ||
313 | |||
314 | err_ret: | ||
315 | kfree(input_string); | ||
316 | |||
317 | unlock_ret_path: | ||
318 | mutex_unlock(&ksym_tracer_mutex); | ||
319 | return ret; | ||
320 | } | ||
321 | |||
322 | static const struct file_operations ksym_tracing_fops = { | ||
323 | .open = tracing_open_generic, | ||
324 | .read = ksym_trace_filter_read, | ||
325 | .write = ksym_trace_filter_write, | ||
326 | }; | ||
327 | |||
328 | static void ksym_trace_reset(struct trace_array *tr) | ||
329 | { | ||
330 | struct trace_ksym *entry; | ||
331 | struct hlist_node *node, *node1; | ||
332 | |||
333 | ksym_tracing_enabled = 0; | ||
334 | |||
335 | mutex_lock(&ksym_tracer_mutex); | ||
336 | hlist_for_each_entry_safe(entry, node, node1, &ksym_filter_head, | ||
337 | ksym_hlist) { | ||
338 | unregister_kernel_hw_breakpoint(entry->ksym_hbp); | ||
339 | ksym_filter_entry_count--; | ||
340 | hlist_del_rcu(&(entry->ksym_hlist)); | ||
341 | synchronize_rcu(); | ||
342 | /* Free the 'input_string' only if reset | ||
343 | * after startup self-test | ||
344 | */ | ||
345 | #ifdef CONFIG_FTRACE_SELFTEST | ||
346 | if (strncmp(entry->ksym_hbp->info.name, KSYM_SELFTEST_ENTRY, | ||
347 | strlen(KSYM_SELFTEST_ENTRY)) != 0) | ||
348 | #endif /* CONFIG_FTRACE_SELFTEST*/ | ||
349 | kfree(entry->ksym_hbp->info.name); | ||
350 | kfree(entry->ksym_hbp); | ||
351 | kfree(entry); | ||
352 | } | ||
353 | mutex_unlock(&ksym_tracer_mutex); | ||
354 | } | ||
355 | |||
356 | static int ksym_trace_init(struct trace_array *tr) | ||
357 | { | ||
358 | int cpu, ret = 0; | ||
359 | |||
360 | for_each_online_cpu(cpu) | ||
361 | tracing_reset(tr, cpu); | ||
362 | ksym_tracing_enabled = 1; | ||
363 | ksym_trace_array = tr; | ||
364 | |||
365 | return ret; | ||
366 | } | ||
367 | |||
368 | static void ksym_trace_print_header(struct seq_file *m) | ||
369 | { | ||
370 | |||
371 | seq_puts(m, | ||
372 | "# TASK-PID CPU# Symbol Type " | ||
373 | "Function \n"); | ||
374 | seq_puts(m, | ||
375 | "# | | | | " | ||
376 | "| \n"); | ||
377 | } | ||
378 | |||
379 | static enum print_line_t ksym_trace_output(struct trace_iterator *iter) | ||
380 | { | ||
381 | struct trace_entry *entry = iter->ent; | ||
382 | struct trace_seq *s = &iter->seq; | ||
383 | struct trace_ksym *field; | ||
384 | char str[KSYM_SYMBOL_LEN]; | ||
385 | int ret; | ||
386 | |||
387 | if (entry->type != TRACE_KSYM) | ||
388 | return TRACE_TYPE_UNHANDLED; | ||
389 | |||
390 | trace_assign_type(field, entry); | ||
391 | |||
392 | ret = trace_seq_printf(s, "%-15s %-5d %-3d %-20s ", field->p_name, | ||
393 | entry->pid, iter->cpu, field->ksym_name); | ||
394 | if (!ret) | ||
395 | return TRACE_TYPE_PARTIAL_LINE; | ||
396 | |||
397 | switch (field->ksym_hbp->info.type) { | ||
398 | case HW_BREAKPOINT_WRITE: | ||
399 | ret = trace_seq_printf(s, " W "); | ||
400 | break; | ||
401 | case HW_BREAKPOINT_RW: | ||
402 | ret = trace_seq_printf(s, " RW "); | ||
403 | break; | ||
404 | default: | ||
405 | return TRACE_TYPE_PARTIAL_LINE; | ||
406 | } | ||
407 | |||
408 | if (!ret) | ||
409 | return TRACE_TYPE_PARTIAL_LINE; | ||
410 | |||
411 | sprint_symbol(str, field->ip); | ||
412 | ret = trace_seq_printf(s, "%-20s\n", str); | ||
413 | if (!ret) | ||
414 | return TRACE_TYPE_PARTIAL_LINE; | ||
415 | |||
416 | return TRACE_TYPE_HANDLED; | ||
417 | } | ||
418 | |||
419 | struct tracer ksym_tracer __read_mostly = | ||
420 | { | ||
421 | .name = "ksym_tracer", | ||
422 | .init = ksym_trace_init, | ||
423 | .reset = ksym_trace_reset, | ||
424 | #ifdef CONFIG_FTRACE_SELFTEST | ||
425 | .selftest = trace_selftest_startup_ksym, | ||
426 | #endif | ||
427 | .print_header = ksym_trace_print_header, | ||
428 | .print_line = ksym_trace_output | ||
429 | }; | ||
430 | |||
431 | __init static int init_ksym_trace(void) | ||
432 | { | ||
433 | struct dentry *d_tracer; | ||
434 | struct dentry *entry; | ||
435 | |||
436 | d_tracer = tracing_init_dentry(); | ||
437 | ksym_filter_entry_count = 0; | ||
438 | |||
439 | entry = debugfs_create_file("ksym_trace_filter", 0644, d_tracer, | ||
440 | NULL, &ksym_tracing_fops); | ||
441 | if (!entry) | ||
442 | pr_warning("Could not create debugfs " | ||
443 | "'ksym_trace_filter' file\n"); | ||
444 | |||
445 | return register_tracer(&ksym_tracer); | ||
446 | } | ||
447 | device_initcall(init_ksym_trace); | ||
448 | |||
449 | |||
450 | #ifdef CONFIG_PROFILE_KSYM_TRACER | ||
451 | static int ksym_tracer_stat_headers(struct seq_file *m) | ||
452 | { | ||
453 | seq_printf(m, " Access type "); | ||
454 | seq_printf(m, " Symbol Counter \n"); | ||
455 | return 0; | ||
456 | } | ||
457 | |||
458 | static int ksym_tracer_stat_show(struct seq_file *m, void *v) | ||
459 | { | ||
460 | struct hlist_node *stat = v; | ||
461 | struct trace_ksym *entry; | ||
462 | int access_type = 0; | ||
463 | char fn_name[KSYM_NAME_LEN]; | ||
464 | |||
465 | entry = hlist_entry(stat, struct trace_ksym, ksym_hlist); | ||
466 | |||
467 | if (entry->ksym_hbp) | ||
468 | access_type = entry->ksym_hbp->info.type; | ||
469 | |||
470 | switch (access_type) { | ||
471 | case HW_BREAKPOINT_WRITE: | ||
472 | seq_printf(m, " W "); | ||
473 | break; | ||
474 | case HW_BREAKPOINT_RW: | ||
475 | seq_printf(m, " RW "); | ||
476 | break; | ||
477 | default: | ||
478 | seq_printf(m, " NA "); | ||
479 | } | ||
480 | |||
481 | if (lookup_symbol_name(entry->ksym_addr, fn_name) >= 0) | ||
482 | seq_printf(m, " %s ", fn_name); | ||
483 | else | ||
484 | seq_printf(m, " <NA> "); | ||
485 | |||
486 | seq_printf(m, "%15lu\n", entry->counter); | ||
487 | return 0; | ||
488 | } | ||
489 | |||
490 | static void *ksym_tracer_stat_start(struct tracer_stat *trace) | ||
491 | { | ||
492 | return &(ksym_filter_head.first); | ||
493 | } | ||
494 | |||
495 | static void * | ||
496 | ksym_tracer_stat_next(void *v, int idx) | ||
497 | { | ||
498 | struct hlist_node *stat = v; | ||
499 | |||
500 | return stat->next; | ||
501 | } | ||
502 | |||
503 | static struct tracer_stat ksym_tracer_stats = { | ||
504 | .name = "ksym_tracer", | ||
505 | .stat_start = ksym_tracer_stat_start, | ||
506 | .stat_next = ksym_tracer_stat_next, | ||
507 | .stat_headers = ksym_tracer_stat_headers, | ||
508 | .stat_show = ksym_tracer_stat_show | ||
509 | }; | ||
510 | |||
511 | __init static int ksym_tracer_stat_init(void) | ||
512 | { | ||
513 | int ret; | ||
514 | |||
515 | ret = register_stat_tracer(&ksym_tracer_stats); | ||
516 | if (ret) { | ||
517 | printk(KERN_WARNING "Warning: could not register " | ||
518 | "ksym tracer stats\n"); | ||
519 | return 1; | ||
520 | } | ||
521 | |||
522 | return 0; | ||
523 | } | ||
524 | fs_initcall(ksym_tracer_stat_init); | ||
525 | #endif /* CONFIG_PROFILE_KSYM_TRACER */ | ||
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 00dd6485bdd7..71f2edb0fd84 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c | |||
@@ -17,6 +17,7 @@ static inline int trace_valid_entry(struct trace_entry *entry) | |||
17 | case TRACE_GRAPH_ENT: | 17 | case TRACE_GRAPH_ENT: |
18 | case TRACE_GRAPH_RET: | 18 | case TRACE_GRAPH_RET: |
19 | case TRACE_HW_BRANCHES: | 19 | case TRACE_HW_BRANCHES: |
20 | case TRACE_KSYM: | ||
20 | return 1; | 21 | return 1; |
21 | } | 22 | } |
22 | return 0; | 23 | return 0; |
@@ -807,3 +808,55 @@ trace_selftest_startup_hw_branches(struct tracer *trace, | |||
807 | return ret; | 808 | return ret; |
808 | } | 809 | } |
809 | #endif /* CONFIG_HW_BRANCH_TRACER */ | 810 | #endif /* CONFIG_HW_BRANCH_TRACER */ |
811 | |||
812 | #ifdef CONFIG_KSYM_TRACER | ||
813 | static int ksym_selftest_dummy; | ||
814 | |||
815 | int | ||
816 | trace_selftest_startup_ksym(struct tracer *trace, struct trace_array *tr) | ||
817 | { | ||
818 | unsigned long count; | ||
819 | int ret; | ||
820 | |||
821 | /* start the tracing */ | ||
822 | ret = tracer_init(trace, tr); | ||
823 | if (ret) { | ||
824 | warn_failed_init_tracer(trace, ret); | ||
825 | return ret; | ||
826 | } | ||
827 | |||
828 | ksym_selftest_dummy = 0; | ||
829 | /* Register the read-write tracing request */ | ||
830 | ret = process_new_ksym_entry(KSYM_SELFTEST_ENTRY, HW_BREAKPOINT_RW, | ||
831 | (unsigned long)(&ksym_selftest_dummy)); | ||
832 | |||
833 | if (ret < 0) { | ||
834 | printk(KERN_CONT "ksym_trace read-write startup test failed\n"); | ||
835 | goto ret_path; | ||
836 | } | ||
837 | /* Perform a read and a write operation over the dummy variable to | ||
838 | * trigger the tracer | ||
839 | */ | ||
840 | if (ksym_selftest_dummy == 0) | ||
841 | ksym_selftest_dummy++; | ||
842 | |||
843 | /* stop the tracing. */ | ||
844 | tracing_stop(); | ||
845 | /* check the trace buffer */ | ||
846 | ret = trace_test_buffer(tr, &count); | ||
847 | trace->reset(tr); | ||
848 | tracing_start(); | ||
849 | |||
850 | /* read & write operations - one each is performed on the dummy variable | ||
851 | * triggering two entries in the trace buffer | ||
852 | */ | ||
853 | if (!ret && count != 2) { | ||
854 | printk(KERN_CONT "Ksym tracer startup test failed"); | ||
855 | ret = -1; | ||
856 | } | ||
857 | |||
858 | ret_path: | ||
859 | return ret; | ||
860 | } | ||
861 | #endif /* CONFIG_KSYM_TRACER */ | ||
862 | |||
diff --git a/samples/Kconfig b/samples/Kconfig index 428b065ba695..17d64ba7864c 100644 --- a/samples/Kconfig +++ b/samples/Kconfig | |||
@@ -46,5 +46,11 @@ config SAMPLE_KRETPROBES | |||
46 | default m | 46 | default m |
47 | depends on SAMPLE_KPROBES && KRETPROBES | 47 | depends on SAMPLE_KPROBES && KRETPROBES |
48 | 48 | ||
49 | config SAMPLE_HW_BREAKPOINT | ||
50 | tristate "Build kernel hardware breakpoint examples -- loadable module only" | ||
51 | depends on HAVE_HW_BREAKPOINT && m | ||
52 | help | ||
53 | This builds kernel hardware breakpoint example modules. | ||
54 | |||
49 | endif # SAMPLES | 55 | endif # SAMPLES |
50 | 56 | ||
diff --git a/samples/Makefile b/samples/Makefile index 13e4b470b539..42e175598777 100644 --- a/samples/Makefile +++ b/samples/Makefile | |||
@@ -1,3 +1,4 @@ | |||
1 | # Makefile for Linux samples code | 1 | # Makefile for Linux samples code |
2 | 2 | ||
3 | obj-$(CONFIG_SAMPLES) += markers/ kobject/ kprobes/ tracepoints/ trace_events/ | 3 | obj-$(CONFIG_SAMPLES) += markers/ kobject/ kprobes/ tracepoints/ \ |
4 | trace_events/ hw_breakpoint/ | ||
diff --git a/samples/hw_breakpoint/Makefile b/samples/hw_breakpoint/Makefile new file mode 100644 index 000000000000..0f5c31c2fc47 --- /dev/null +++ b/samples/hw_breakpoint/Makefile | |||
@@ -0,0 +1 @@ | |||
obj-$(CONFIG_SAMPLE_HW_BREAKPOINT) += data_breakpoint.o | |||
diff --git a/samples/hw_breakpoint/data_breakpoint.c b/samples/hw_breakpoint/data_breakpoint.c new file mode 100644 index 000000000000..9cbdbb871b7a --- /dev/null +++ b/samples/hw_breakpoint/data_breakpoint.c | |||
@@ -0,0 +1,83 @@ | |||
1 | /* | ||
2 | * data_breakpoint.c - Sample HW Breakpoint file to watch kernel data address | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License as published by | ||
6 | * the Free Software Foundation; either version 2 of the License, or | ||
7 | * (at your option) any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write to the Free Software | ||
16 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
17 | * | ||
18 | * usage: insmod data_breakpoint.ko ksym=<ksym_name> | ||
19 | * | ||
20 | * This file is a kernel module that places a breakpoint over ksym_name kernel | ||
21 | * variable using Hardware Breakpoint register. The corresponding handler which | ||
22 | * prints a backtrace is invoked everytime a write operation is performed on | ||
23 | * that variable. | ||
24 | * | ||
25 | * Copyright (C) IBM Corporation, 2009 | ||
26 | */ | ||
27 | #include <linux/module.h> /* Needed by all modules */ | ||
28 | #include <linux/kernel.h> /* Needed for KERN_INFO */ | ||
29 | #include <linux/init.h> /* Needed for the macros */ | ||
30 | |||
31 | #include <asm/hw_breakpoint.h> | ||
32 | |||
33 | struct hw_breakpoint sample_hbp; | ||
34 | |||
35 | static char ksym_name[KSYM_NAME_LEN] = "pid_max"; | ||
36 | module_param_string(ksym, ksym_name, KSYM_NAME_LEN, S_IRUGO); | ||
37 | MODULE_PARM_DESC(ksym, "Kernel symbol to monitor; this module will report any" | ||
38 | " write operations on the kernel symbol"); | ||
39 | |||
40 | void sample_hbp_handler(struct hw_breakpoint *temp, struct pt_regs | ||
41 | *temp_regs) | ||
42 | { | ||
43 | printk(KERN_INFO "%s value is changed\n", ksym_name); | ||
44 | dump_stack(); | ||
45 | printk(KERN_INFO "Dump stack from sample_hbp_handler\n"); | ||
46 | } | ||
47 | |||
48 | static int __init hw_break_module_init(void) | ||
49 | { | ||
50 | int ret; | ||
51 | |||
52 | #ifdef CONFIG_X86 | ||
53 | sample_hbp.info.name = ksym_name; | ||
54 | sample_hbp.info.type = HW_BREAKPOINT_WRITE; | ||
55 | sample_hbp.info.len = HW_BREAKPOINT_LEN_4; | ||
56 | #endif /* CONFIG_X86 */ | ||
57 | |||
58 | sample_hbp.triggered = (void *)sample_hbp_handler; | ||
59 | |||
60 | ret = register_kernel_hw_breakpoint(&sample_hbp); | ||
61 | |||
62 | if (ret < 0) { | ||
63 | printk(KERN_INFO "Breakpoint registration failed\n"); | ||
64 | return ret; | ||
65 | } else | ||
66 | printk(KERN_INFO "HW Breakpoint for %s write installed\n", | ||
67 | ksym_name); | ||
68 | |||
69 | return 0; | ||
70 | } | ||
71 | |||
72 | static void __exit hw_break_module_exit(void) | ||
73 | { | ||
74 | unregister_kernel_hw_breakpoint(&sample_hbp); | ||
75 | printk(KERN_INFO "HW Breakpoint for %s write uninstalled\n", ksym_name); | ||
76 | } | ||
77 | |||
78 | module_init(hw_break_module_init); | ||
79 | module_exit(hw_break_module_exit); | ||
80 | |||
81 | MODULE_LICENSE("GPL"); | ||
82 | MODULE_AUTHOR("K.Prasad"); | ||
83 | MODULE_DESCRIPTION("ksym breakpoint"); | ||