aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/Kconfig4
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/include/asm/a.out-core.h8
-rw-r--r--arch/x86/include/asm/debugreg.h29
-rw-r--r--arch/x86/include/asm/hw_breakpoint.h55
-rw-r--r--arch/x86/include/asm/processor.h8
-rw-r--r--arch/x86/kernel/Makefile2
-rw-r--r--arch/x86/kernel/hw_breakpoint.c391
-rw-r--r--arch/x86/kernel/kgdb.c6
-rw-r--r--arch/x86/kernel/kprobes.c9
-rw-r--r--arch/x86/kernel/machine_kexec_32.c2
-rw-r--r--arch/x86/kernel/machine_kexec_64.c2
-rw-r--r--arch/x86/kernel/process.c22
-rw-r--r--arch/x86/kernel/process_32.c28
-rw-r--r--arch/x86/kernel/process_64.c31
-rw-r--r--arch/x86/kernel/ptrace.c231
-rw-r--r--arch/x86/kernel/signal.c9
-rw-r--r--arch/x86/kernel/smpboot.c3
-rw-r--r--arch/x86/kernel/traps.c73
-rw-r--r--arch/x86/mm/kmmio.c8
-rw-r--r--arch/x86/power/cpu.c24
-rw-r--r--include/asm-generic/hw_breakpoint.h139
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/hw_breakpoint.c378
-rw-r--r--kernel/trace/Kconfig21
-rw-r--r--kernel/trace/Makefile1
-rw-r--r--kernel/trace/trace.h18
-rw-r--r--kernel/trace/trace_ksym.c550
-rw-r--r--kernel/trace/trace_selftest.c53
-rw-r--r--samples/Kconfig6
-rw-r--r--samples/Makefile3
-rw-r--r--samples/hw_breakpoint/Makefile1
-rw-r--r--samples/hw_breakpoint/data_breakpoint.c83
33 files changed, 2005 insertions, 195 deletions
diff --git a/arch/Kconfig b/arch/Kconfig
index 99193b16023..c72f18fde31 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -113,4 +113,8 @@ config HAVE_DMA_API_DEBUG
113config HAVE_DEFAULT_NO_SPIN_MUTEXES 113config HAVE_DEFAULT_NO_SPIN_MUTEXES
114 bool 114 bool
115 115
116config HAVE_HW_BREAKPOINT
117 bool
118
119
116source "kernel/gcov/Kconfig" 120source "kernel/gcov/Kconfig"
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 071c6b0e760..f46f30d23eb 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -49,6 +49,7 @@ config X86
49 select HAVE_KERNEL_GZIP 49 select HAVE_KERNEL_GZIP
50 select HAVE_KERNEL_BZIP2 50 select HAVE_KERNEL_BZIP2
51 select HAVE_KERNEL_LZMA 51 select HAVE_KERNEL_LZMA
52 select HAVE_HW_BREAKPOINT
52 select HAVE_ARCH_KMEMCHECK 53 select HAVE_ARCH_KMEMCHECK
53 54
54config OUTPUT_FORMAT 55config OUTPUT_FORMAT
diff --git a/arch/x86/include/asm/a.out-core.h b/arch/x86/include/asm/a.out-core.h
index bb70e397aa8..fc4685dd6e4 100644
--- a/arch/x86/include/asm/a.out-core.h
+++ b/arch/x86/include/asm/a.out-core.h
@@ -32,10 +32,10 @@ static inline void aout_dump_thread(struct pt_regs *regs, struct user *dump)
32 >> PAGE_SHIFT; 32 >> PAGE_SHIFT;
33 dump->u_dsize -= dump->u_tsize; 33 dump->u_dsize -= dump->u_tsize;
34 dump->u_ssize = 0; 34 dump->u_ssize = 0;
35 dump->u_debugreg[0] = current->thread.debugreg0; 35 dump->u_debugreg[0] = current->thread.debugreg[0];
36 dump->u_debugreg[1] = current->thread.debugreg1; 36 dump->u_debugreg[1] = current->thread.debugreg[1];
37 dump->u_debugreg[2] = current->thread.debugreg2; 37 dump->u_debugreg[2] = current->thread.debugreg[2];
38 dump->u_debugreg[3] = current->thread.debugreg3; 38 dump->u_debugreg[3] = current->thread.debugreg[3];
39 dump->u_debugreg[4] = 0; 39 dump->u_debugreg[4] = 0;
40 dump->u_debugreg[5] = 0; 40 dump->u_debugreg[5] = 0;
41 dump->u_debugreg[6] = current->thread.debugreg6; 41 dump->u_debugreg[6] = current->thread.debugreg6;
diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h
index 3ea6f37be9e..23439fbb1d0 100644
--- a/arch/x86/include/asm/debugreg.h
+++ b/arch/x86/include/asm/debugreg.h
@@ -18,6 +18,7 @@
18#define DR_TRAP1 (0x2) /* db1 */ 18#define DR_TRAP1 (0x2) /* db1 */
19#define DR_TRAP2 (0x4) /* db2 */ 19#define DR_TRAP2 (0x4) /* db2 */
20#define DR_TRAP3 (0x8) /* db3 */ 20#define DR_TRAP3 (0x8) /* db3 */
21#define DR_TRAP_BITS (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)
21 22
22#define DR_STEP (0x4000) /* single-step */ 23#define DR_STEP (0x4000) /* single-step */
23#define DR_SWITCH (0x8000) /* task switch */ 24#define DR_SWITCH (0x8000) /* task switch */
@@ -49,6 +50,8 @@
49 50
50#define DR_LOCAL_ENABLE_SHIFT 0 /* Extra shift to the local enable bit */ 51#define DR_LOCAL_ENABLE_SHIFT 0 /* Extra shift to the local enable bit */
51#define DR_GLOBAL_ENABLE_SHIFT 1 /* Extra shift to the global enable bit */ 52#define DR_GLOBAL_ENABLE_SHIFT 1 /* Extra shift to the global enable bit */
53#define DR_LOCAL_ENABLE (0x1) /* Local enable for reg 0 */
54#define DR_GLOBAL_ENABLE (0x2) /* Global enable for reg 0 */
52#define DR_ENABLE_SIZE 2 /* 2 enable bits per register */ 55#define DR_ENABLE_SIZE 2 /* 2 enable bits per register */
53 56
54#define DR_LOCAL_ENABLE_MASK (0x55) /* Set local bits for all 4 regs */ 57#define DR_LOCAL_ENABLE_MASK (0x55) /* Set local bits for all 4 regs */
@@ -67,4 +70,30 @@
67#define DR_LOCAL_SLOWDOWN (0x100) /* Local slow the pipeline */ 70#define DR_LOCAL_SLOWDOWN (0x100) /* Local slow the pipeline */
68#define DR_GLOBAL_SLOWDOWN (0x200) /* Global slow the pipeline */ 71#define DR_GLOBAL_SLOWDOWN (0x200) /* Global slow the pipeline */
69 72
73/*
74 * HW breakpoint additions
75 */
76#ifdef __KERNEL__
77
78/* For process management */
79extern void flush_thread_hw_breakpoint(struct task_struct *tsk);
80extern int copy_thread_hw_breakpoint(struct task_struct *tsk,
81 struct task_struct *child, unsigned long clone_flags);
82
83/* For CPU management */
84extern void load_debug_registers(void);
85static inline void hw_breakpoint_disable(void)
86{
87 /* Zero the control register for HW Breakpoint */
88 set_debugreg(0UL, 7);
89
90 /* Zero-out the individual HW breakpoint address registers */
91 set_debugreg(0UL, 0);
92 set_debugreg(0UL, 1);
93 set_debugreg(0UL, 2);
94 set_debugreg(0UL, 3);
95}
96
97#endif /* __KERNEL__ */
98
70#endif /* _ASM_X86_DEBUGREG_H */ 99#endif /* _ASM_X86_DEBUGREG_H */
diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h
new file mode 100644
index 00000000000..1acb4d45de7
--- /dev/null
+++ b/arch/x86/include/asm/hw_breakpoint.h
@@ -0,0 +1,55 @@
1#ifndef _I386_HW_BREAKPOINT_H
2#define _I386_HW_BREAKPOINT_H
3
4#ifdef __KERNEL__
5#define __ARCH_HW_BREAKPOINT_H
6
7struct arch_hw_breakpoint {
8 char *name; /* Contains name of the symbol to set bkpt */
9 unsigned long address;
10 u8 len;
11 u8 type;
12};
13
14#include <linux/kdebug.h>
15#include <asm-generic/hw_breakpoint.h>
16
17/* Available HW breakpoint length encodings */
18#define HW_BREAKPOINT_LEN_1 0x40
19#define HW_BREAKPOINT_LEN_2 0x44
20#define HW_BREAKPOINT_LEN_4 0x4c
21#define HW_BREAKPOINT_LEN_EXECUTE 0x40
22
23#ifdef CONFIG_X86_64
24#define HW_BREAKPOINT_LEN_8 0x48
25#endif
26
27/* Available HW breakpoint type encodings */
28
29/* trigger on instruction execute */
30#define HW_BREAKPOINT_EXECUTE 0x80
31/* trigger on memory write */
32#define HW_BREAKPOINT_WRITE 0x81
33/* trigger on memory read or write */
34#define HW_BREAKPOINT_RW 0x83
35
36/* Total number of available HW breakpoint registers */
37#define HBP_NUM 4
38
39extern struct hw_breakpoint *hbp_kernel[HBP_NUM];
40DECLARE_PER_CPU(struct hw_breakpoint*, this_hbp_kernel[HBP_NUM]);
41extern unsigned int hbp_user_refcount[HBP_NUM];
42
43extern void arch_install_thread_hw_breakpoint(struct task_struct *tsk);
44extern void arch_uninstall_thread_hw_breakpoint(void);
45extern int arch_check_va_in_userspace(unsigned long va, u8 hbp_len);
46extern int arch_validate_hwbkpt_settings(struct hw_breakpoint *bp,
47 struct task_struct *tsk);
48extern void arch_update_user_hw_breakpoint(int pos, struct task_struct *tsk);
49extern void arch_flush_thread_hw_breakpoint(struct task_struct *tsk);
50extern void arch_update_kernel_hw_breakpoint(void *);
51extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
52 unsigned long val, void *data);
53#endif /* __KERNEL__ */
54#endif /* _I386_HW_BREAKPOINT_H */
55
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index c7768269b1c..2b03f700d3f 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -29,6 +29,7 @@ struct mm_struct;
29#include <linux/threads.h> 29#include <linux/threads.h>
30#include <linux/init.h> 30#include <linux/init.h>
31 31
32#define HBP_NUM 4
32/* 33/*
33 * Default implementation of macro that returns current 34 * Default implementation of macro that returns current
34 * instruction pointer ("program counter"). 35 * instruction pointer ("program counter").
@@ -433,12 +434,11 @@ struct thread_struct {
433#endif 434#endif
434 unsigned long gs; 435 unsigned long gs;
435 /* Hardware debugging registers: */ 436 /* Hardware debugging registers: */
436 unsigned long debugreg0; 437 unsigned long debugreg[HBP_NUM];
437 unsigned long debugreg1;
438 unsigned long debugreg2;
439 unsigned long debugreg3;
440 unsigned long debugreg6; 438 unsigned long debugreg6;
441 unsigned long debugreg7; 439 unsigned long debugreg7;
440 /* Hardware breakpoint info */
441 struct hw_breakpoint *hbp[HBP_NUM];
442 /* Fault info: */ 442 /* Fault info: */
443 unsigned long cr2; 443 unsigned long cr2;
444 unsigned long trap_no; 444 unsigned long trap_no;
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 430d5b24af7..bf04201b657 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -40,7 +40,7 @@ obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o
40obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o 40obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o
41obj-y += bootflag.o e820.o 41obj-y += bootflag.o e820.o
42obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o 42obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o
43obj-y += alternative.o i8253.o pci-nommu.o 43obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o
44obj-y += tsc.o io_delay.o rtc.o 44obj-y += tsc.o io_delay.o rtc.o
45 45
46obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o 46obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
new file mode 100644
index 00000000000..9316a9de4de
--- /dev/null
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -0,0 +1,391 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License as published by
4 * the Free Software Foundation; either version 2 of the License, or
5 * (at your option) any later version.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * along with this program; if not, write to the Free Software
14 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
15 *
16 * Copyright (C) 2007 Alan Stern
17 * Copyright (C) 2009 IBM Corporation
18 */
19
20/*
21 * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility,
22 * using the CPU's debug registers.
23 */
24
25#include <linux/irqflags.h>
26#include <linux/notifier.h>
27#include <linux/kallsyms.h>
28#include <linux/kprobes.h>
29#include <linux/percpu.h>
30#include <linux/kdebug.h>
31#include <linux/kernel.h>
32#include <linux/module.h>
33#include <linux/sched.h>
34#include <linux/init.h>
35#include <linux/smp.h>
36
37#include <asm/hw_breakpoint.h>
38#include <asm/processor.h>
39#include <asm/debugreg.h>
40
41/* Unmasked kernel DR7 value */
42static unsigned long kdr7;
43
44/*
45 * Masks for the bits corresponding to registers DR0 - DR3 in DR7 register.
46 * Used to clear and verify the status of bits corresponding to DR0 - DR3
47 */
48static const unsigned long dr7_masks[HBP_NUM] = {
49 0x000f0003, /* LEN0, R/W0, G0, L0 */
50 0x00f0000c, /* LEN1, R/W1, G1, L1 */
51 0x0f000030, /* LEN2, R/W2, G2, L2 */
52 0xf00000c0 /* LEN3, R/W3, G3, L3 */
53};
54
55
56/*
57 * Encode the length, type, Exact, and Enable bits for a particular breakpoint
58 * as stored in debug register 7.
59 */
60static unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type)
61{
62 unsigned long bp_info;
63
64 bp_info = (len | type) & 0xf;
65 bp_info <<= (DR_CONTROL_SHIFT + drnum * DR_CONTROL_SIZE);
66 bp_info |= (DR_GLOBAL_ENABLE << (drnum * DR_ENABLE_SIZE)) |
67 DR_GLOBAL_SLOWDOWN;
68 return bp_info;
69}
70
71void arch_update_kernel_hw_breakpoint(void *unused)
72{
73 struct hw_breakpoint *bp;
74 int i, cpu = get_cpu();
75 unsigned long temp_kdr7 = 0;
76
77 /* Don't allow debug exceptions while we update the registers */
78 set_debugreg(0UL, 7);
79
80 for (i = hbp_kernel_pos; i < HBP_NUM; i++) {
81 per_cpu(this_hbp_kernel[i], cpu) = bp = hbp_kernel[i];
82 if (bp) {
83 temp_kdr7 |= encode_dr7(i, bp->info.len, bp->info.type);
84 set_debugreg(bp->info.address, i);
85 }
86 }
87
88 /* No need to set DR6. Update the debug registers with kernel-space
89 * breakpoint values from kdr7 and user-space requests from the
90 * current process
91 */
92 kdr7 = temp_kdr7;
93 set_debugreg(kdr7 | current->thread.debugreg7, 7);
94 put_cpu();
95}
96
97/*
98 * Install the thread breakpoints in their debug registers.
99 */
100void arch_install_thread_hw_breakpoint(struct task_struct *tsk)
101{
102 struct thread_struct *thread = &(tsk->thread);
103
104 switch (hbp_kernel_pos) {
105 case 4:
106 set_debugreg(thread->debugreg[3], 3);
107 case 3:
108 set_debugreg(thread->debugreg[2], 2);
109 case 2:
110 set_debugreg(thread->debugreg[1], 1);
111 case 1:
112 set_debugreg(thread->debugreg[0], 0);
113 default:
114 break;
115 }
116
117 /* No need to set DR6 */
118 set_debugreg((kdr7 | thread->debugreg7), 7);
119}
120
121/*
122 * Install the debug register values for just the kernel, no thread.
123 */
124void arch_uninstall_thread_hw_breakpoint(void)
125{
126 /* Clear the user-space portion of debugreg7 by setting only kdr7 */
127 set_debugreg(kdr7, 7);
128
129}
130
131static int get_hbp_len(u8 hbp_len)
132{
133 unsigned int len_in_bytes = 0;
134
135 switch (hbp_len) {
136 case HW_BREAKPOINT_LEN_1:
137 len_in_bytes = 1;
138 break;
139 case HW_BREAKPOINT_LEN_2:
140 len_in_bytes = 2;
141 break;
142 case HW_BREAKPOINT_LEN_4:
143 len_in_bytes = 4;
144 break;
145#ifdef CONFIG_X86_64
146 case HW_BREAKPOINT_LEN_8:
147 len_in_bytes = 8;
148 break;
149#endif
150 }
151 return len_in_bytes;
152}
153
154/*
155 * Check for virtual address in user space.
156 */
157int arch_check_va_in_userspace(unsigned long va, u8 hbp_len)
158{
159 unsigned int len;
160
161 len = get_hbp_len(hbp_len);
162
163 return (va <= TASK_SIZE - len);
164}
165
166/*
167 * Check for virtual address in kernel space.
168 */
169static int arch_check_va_in_kernelspace(unsigned long va, u8 hbp_len)
170{
171 unsigned int len;
172
173 len = get_hbp_len(hbp_len);
174
175 return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE);
176}
177
178/*
179 * Store a breakpoint's encoded address, length, and type.
180 */
181static int arch_store_info(struct hw_breakpoint *bp, struct task_struct *tsk)
182{
183 /*
184 * User-space requests will always have the address field populated
185 * Symbol names from user-space are rejected
186 */
187 if (tsk && bp->info.name)
188 return -EINVAL;
189 /*
190 * For kernel-addresses, either the address or symbol name can be
191 * specified.
192 */
193 if (bp->info.name)
194 bp->info.address = (unsigned long)
195 kallsyms_lookup_name(bp->info.name);
196 if (bp->info.address)
197 return 0;
198 return -EINVAL;
199}
200
201/*
202 * Validate the arch-specific HW Breakpoint register settings
203 */
204int arch_validate_hwbkpt_settings(struct hw_breakpoint *bp,
205 struct task_struct *tsk)
206{
207 unsigned int align;
208 int ret = -EINVAL;
209
210 switch (bp->info.type) {
211 /*
212 * Ptrace-refactoring code
213 * For now, we'll allow instruction breakpoint only for user-space
214 * addresses
215 */
216 case HW_BREAKPOINT_EXECUTE:
217 if ((!arch_check_va_in_userspace(bp->info.address,
218 bp->info.len)) &&
219 bp->info.len != HW_BREAKPOINT_LEN_EXECUTE)
220 return ret;
221 break;
222 case HW_BREAKPOINT_WRITE:
223 break;
224 case HW_BREAKPOINT_RW:
225 break;
226 default:
227 return ret;
228 }
229
230 switch (bp->info.len) {
231 case HW_BREAKPOINT_LEN_1:
232 align = 0;
233 break;
234 case HW_BREAKPOINT_LEN_2:
235 align = 1;
236 break;
237 case HW_BREAKPOINT_LEN_4:
238 align = 3;
239 break;
240#ifdef CONFIG_X86_64
241 case HW_BREAKPOINT_LEN_8:
242 align = 7;
243 break;
244#endif
245 default:
246 return ret;
247 }
248
249 if (bp->triggered)
250 ret = arch_store_info(bp, tsk);
251
252 if (ret < 0)
253 return ret;
254 /*
255 * Check that the low-order bits of the address are appropriate
256 * for the alignment implied by len.
257 */
258 if (bp->info.address & align)
259 return -EINVAL;
260
261 /* Check that the virtual address is in the proper range */
262 if (tsk) {
263 if (!arch_check_va_in_userspace(bp->info.address, bp->info.len))
264 return -EFAULT;
265 } else {
266 if (!arch_check_va_in_kernelspace(bp->info.address,
267 bp->info.len))
268 return -EFAULT;
269 }
270 return 0;
271}
272
273void arch_update_user_hw_breakpoint(int pos, struct task_struct *tsk)
274{
275 struct thread_struct *thread = &(tsk->thread);
276 struct hw_breakpoint *bp = thread->hbp[pos];
277
278 thread->debugreg7 &= ~dr7_masks[pos];
279 if (bp) {
280 thread->debugreg[pos] = bp->info.address;
281 thread->debugreg7 |= encode_dr7(pos, bp->info.len,
282 bp->info.type);
283 } else
284 thread->debugreg[pos] = 0;
285}
286
287void arch_flush_thread_hw_breakpoint(struct task_struct *tsk)
288{
289 int i;
290 struct thread_struct *thread = &(tsk->thread);
291
292 thread->debugreg7 = 0;
293 for (i = 0; i < HBP_NUM; i++)
294 thread->debugreg[i] = 0;
295}
296
297/*
298 * Handle debug exception notifications.
299 *
300 * Return value is either NOTIFY_STOP or NOTIFY_DONE as explained below.
301 *
302 * NOTIFY_DONE returned if one of the following conditions is true.
303 * i) When the causative address is from user-space and the exception
304 * is a valid one, i.e. not triggered as a result of lazy debug register
305 * switching
306 * ii) When there are more bits than trap<n> set in DR6 register (such
307 * as BD, BS or BT) indicating that more than one debug condition is
308 * met and requires some more action in do_debug().
309 *
310 * NOTIFY_STOP returned for all other cases
311 *
312 */
313static int __kprobes hw_breakpoint_handler(struct die_args *args)
314{
315 int i, cpu, rc = NOTIFY_STOP;
316 struct hw_breakpoint *bp;
317 unsigned long dr7, dr6;
318 unsigned long *dr6_p;
319
320 /* The DR6 value is pointed by args->err */
321 dr6_p = (unsigned long *)ERR_PTR(args->err);
322 dr6 = *dr6_p;
323
324 /* Do an early return if no trap bits are set in DR6 */
325 if ((dr6 & DR_TRAP_BITS) == 0)
326 return NOTIFY_DONE;
327
328 /* Lazy debug register switching */
329 if (!test_tsk_thread_flag(current, TIF_DEBUG))
330 arch_uninstall_thread_hw_breakpoint();
331
332 get_debugreg(dr7, 7);
333 /* Disable breakpoints during exception handling */
334 set_debugreg(0UL, 7);
335 /*
336 * Assert that local interrupts are disabled
337 * Reset the DRn bits in the virtualized register value.
338 * The ptrace trigger routine will add in whatever is needed.
339 */
340 current->thread.debugreg6 &= ~DR_TRAP_BITS;
341 cpu = get_cpu();
342
343 /* Handle all the breakpoints that were triggered */
344 for (i = 0; i < HBP_NUM; ++i) {
345 if (likely(!(dr6 & (DR_TRAP0 << i))))
346 continue;
347 /*
348 * Find the corresponding hw_breakpoint structure and
349 * invoke its triggered callback.
350 */
351 if (i >= hbp_kernel_pos)
352 bp = per_cpu(this_hbp_kernel[i], cpu);
353 else {
354 bp = current->thread.hbp[i];
355 if (bp)
356 rc = NOTIFY_DONE;
357 }
358 /*
359 * Reset the 'i'th TRAP bit in dr6 to denote completion of
360 * exception handling
361 */
362 (*dr6_p) &= ~(DR_TRAP0 << i);
363 /*
364 * bp can be NULL due to lazy debug register switching
365 * or due to the delay between updates of hbp_kernel_pos
366 * and this_hbp_kernel.
367 */
368 if (!bp)
369 continue;
370
371 (bp->triggered)(bp, args->regs);
372 }
373 if (dr6 & (~DR_TRAP_BITS))
374 rc = NOTIFY_DONE;
375
376 set_debugreg(dr7, 7);
377 put_cpu();
378 return rc;
379}
380
381/*
382 * Handle debug exception notifications.
383 */
384int __kprobes hw_breakpoint_exceptions_notify(
385 struct notifier_block *unused, unsigned long val, void *data)
386{
387 if (val != DIE_DEBUG)
388 return NOTIFY_DONE;
389
390 return hw_breakpoint_handler(data);
391}
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 8d82a77a3f3..34e86b67550 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -43,6 +43,7 @@
43#include <linux/smp.h> 43#include <linux/smp.h>
44#include <linux/nmi.h> 44#include <linux/nmi.h>
45 45
46#include <asm/debugreg.h>
46#include <asm/apicdef.h> 47#include <asm/apicdef.h>
47#include <asm/system.h> 48#include <asm/system.h>
48 49
@@ -434,6 +435,11 @@ single_step_cont(struct pt_regs *regs, struct die_args *args)
434 "resuming...\n"); 435 "resuming...\n");
435 kgdb_arch_handle_exception(args->trapnr, args->signr, 436 kgdb_arch_handle_exception(args->trapnr, args->signr,
436 args->err, "c", "", regs); 437 args->err, "c", "", regs);
438 /*
439 * Reset the BS bit in dr6 (pointed by args->err) to
440 * denote completion of processing
441 */
442 (*(unsigned long *)ERR_PTR(args->err)) &= ~DR_STEP;
437 443
438 return NOTIFY_STOP; 444 return NOTIFY_STOP;
439} 445}
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 7b5169d2b00..b5b1848c533 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -54,6 +54,7 @@
54#include <asm/pgtable.h> 54#include <asm/pgtable.h>
55#include <asm/uaccess.h> 55#include <asm/uaccess.h>
56#include <asm/alternative.h> 56#include <asm/alternative.h>
57#include <asm/debugreg.h>
57 58
58void jprobe_return_end(void); 59void jprobe_return_end(void);
59 60
@@ -967,8 +968,14 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
967 ret = NOTIFY_STOP; 968 ret = NOTIFY_STOP;
968 break; 969 break;
969 case DIE_DEBUG: 970 case DIE_DEBUG:
970 if (post_kprobe_handler(args->regs)) 971 if (post_kprobe_handler(args->regs)) {
972 /*
973 * Reset the BS bit in dr6 (pointed by args->err) to
974 * denote completion of processing
975 */
976 (*(unsigned long *)ERR_PTR(args->err)) &= ~DR_STEP;
971 ret = NOTIFY_STOP; 977 ret = NOTIFY_STOP;
978 }
972 break; 979 break;
973 case DIE_GPF: 980 case DIE_GPF:
974 /* 981 /*
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index c1c429d0013..c843f8406da 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -25,6 +25,7 @@
25#include <asm/desc.h> 25#include <asm/desc.h>
26#include <asm/system.h> 26#include <asm/system.h>
27#include <asm/cacheflush.h> 27#include <asm/cacheflush.h>
28#include <asm/debugreg.h>
28 29
29static void set_idt(void *newidt, __u16 limit) 30static void set_idt(void *newidt, __u16 limit)
30{ 31{
@@ -202,6 +203,7 @@ void machine_kexec(struct kimage *image)
202 203
203 /* Interrupts aren't acceptable while we reboot */ 204 /* Interrupts aren't acceptable while we reboot */
204 local_irq_disable(); 205 local_irq_disable();
206 hw_breakpoint_disable();
205 207
206 if (image->preserve_context) { 208 if (image->preserve_context) {
207#ifdef CONFIG_X86_IO_APIC 209#ifdef CONFIG_X86_IO_APIC
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 84c3bf209e9..4a8bb82248a 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -18,6 +18,7 @@
18#include <asm/pgtable.h> 18#include <asm/pgtable.h>
19#include <asm/tlbflush.h> 19#include <asm/tlbflush.h>
20#include <asm/mmu_context.h> 20#include <asm/mmu_context.h>
21#include <asm/debugreg.h>
21 22
22static int init_one_level2_page(struct kimage *image, pgd_t *pgd, 23static int init_one_level2_page(struct kimage *image, pgd_t *pgd,
23 unsigned long addr) 24 unsigned long addr)
@@ -282,6 +283,7 @@ void machine_kexec(struct kimage *image)
282 283
283 /* Interrupts aren't acceptable while we reboot */ 284 /* Interrupts aren't acceptable while we reboot */
284 local_irq_disable(); 285 local_irq_disable();
286 hw_breakpoint_disable();
285 287
286 if (image->preserve_context) { 288 if (image->preserve_context) {
287#ifdef CONFIG_X86_IO_APIC 289#ifdef CONFIG_X86_IO_APIC
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 071166a4ba8..1092a1a2fbe 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -17,6 +17,8 @@
17#include <asm/uaccess.h> 17#include <asm/uaccess.h>
18#include <asm/i387.h> 18#include <asm/i387.h>
19#include <asm/ds.h> 19#include <asm/ds.h>
20#include <asm/debugreg.h>
21#include <asm/hw_breakpoint.h>
20 22
21unsigned long idle_halt; 23unsigned long idle_halt;
22EXPORT_SYMBOL(idle_halt); 24EXPORT_SYMBOL(idle_halt);
@@ -48,6 +50,8 @@ void free_thread_xstate(struct task_struct *tsk)
48 kmem_cache_free(task_xstate_cachep, tsk->thread.xstate); 50 kmem_cache_free(task_xstate_cachep, tsk->thread.xstate);
49 tsk->thread.xstate = NULL; 51 tsk->thread.xstate = NULL;
50 } 52 }
53 if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG)))
54 flush_thread_hw_breakpoint(tsk);
51 55
52 WARN(tsk->thread.ds_ctx, "leaking DS context\n"); 56 WARN(tsk->thread.ds_ctx, "leaking DS context\n");
53} 57}
@@ -108,12 +112,8 @@ void flush_thread(void)
108 112
109 clear_tsk_thread_flag(tsk, TIF_DEBUG); 113 clear_tsk_thread_flag(tsk, TIF_DEBUG);
110 114
111 tsk->thread.debugreg0 = 0; 115 if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG)))
112 tsk->thread.debugreg1 = 0; 116 flush_thread_hw_breakpoint(tsk);
113 tsk->thread.debugreg2 = 0;
114 tsk->thread.debugreg3 = 0;
115 tsk->thread.debugreg6 = 0;
116 tsk->thread.debugreg7 = 0;
117 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); 117 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
118 /* 118 /*
119 * Forget coprocessor state.. 119 * Forget coprocessor state..
@@ -195,16 +195,6 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
195 else if (next->debugctlmsr != prev->debugctlmsr) 195 else if (next->debugctlmsr != prev->debugctlmsr)
196 update_debugctlmsr(next->debugctlmsr); 196 update_debugctlmsr(next->debugctlmsr);
197 197
198 if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
199 set_debugreg(next->debugreg0, 0);
200 set_debugreg(next->debugreg1, 1);
201 set_debugreg(next->debugreg2, 2);
202 set_debugreg(next->debugreg3, 3);
203 /* no 4 and 5 */
204 set_debugreg(next->debugreg6, 6);
205 set_debugreg(next->debugreg7, 7);
206 }
207
208 if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ 198 if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
209 test_tsk_thread_flag(next_p, TIF_NOTSC)) { 199 test_tsk_thread_flag(next_p, TIF_NOTSC)) {
210 /* prev and next are different */ 200 /* prev and next are different */
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 59f4524984a..00a8fe4c58b 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -58,6 +58,8 @@
58#include <asm/idle.h> 58#include <asm/idle.h>
59#include <asm/syscalls.h> 59#include <asm/syscalls.h>
60#include <asm/ds.h> 60#include <asm/ds.h>
61#include <asm/debugreg.h>
62#include <asm/hw_breakpoint.h>
61 63
62asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); 64asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
63 65
@@ -262,7 +264,13 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
262 264
263 task_user_gs(p) = get_user_gs(regs); 265 task_user_gs(p) = get_user_gs(regs);
264 266
267 p->thread.io_bitmap_ptr = NULL;
265 tsk = current; 268 tsk = current;
269 err = -ENOMEM;
270 if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG)))
271 if (copy_thread_hw_breakpoint(tsk, p, clone_flags))
272 goto out;
273
266 if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { 274 if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
267 p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr, 275 p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr,
268 IO_BITMAP_BYTES, GFP_KERNEL); 276 IO_BITMAP_BYTES, GFP_KERNEL);
@@ -282,10 +290,13 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
282 err = do_set_thread_area(p, -1, 290 err = do_set_thread_area(p, -1,
283 (struct user_desc __user *)childregs->si, 0); 291 (struct user_desc __user *)childregs->si, 0);
284 292
293out:
285 if (err && p->thread.io_bitmap_ptr) { 294 if (err && p->thread.io_bitmap_ptr) {
286 kfree(p->thread.io_bitmap_ptr); 295 kfree(p->thread.io_bitmap_ptr);
287 p->thread.io_bitmap_max = 0; 296 p->thread.io_bitmap_max = 0;
288 } 297 }
298 if (err)
299 flush_thread_hw_breakpoint(p);
289 300
290 clear_tsk_thread_flag(p, TIF_DS_AREA_MSR); 301 clear_tsk_thread_flag(p, TIF_DS_AREA_MSR);
291 p->thread.ds_ctx = NULL; 302 p->thread.ds_ctx = NULL;
@@ -424,6 +435,23 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
424 lazy_load_gs(next->gs); 435 lazy_load_gs(next->gs);
425 436
426 percpu_write(current_task, next_p); 437 percpu_write(current_task, next_p);
438 /*
439 * There's a problem with moving the arch_install_thread_hw_breakpoint()
440 * call before current is updated. Suppose a kernel breakpoint is
441 * triggered in between the two, the hw-breakpoint handler will see that
442 * the 'current' task does not have TIF_DEBUG flag set and will think it
443 * is leftover from an old task (lazy switching) and will erase it. Then
444 * until the next context switch, no user-breakpoints will be installed.
445 *
446 * The real problem is that it's impossible to update both current and
447 * physical debug registers at the same instant, so there will always be
448 * a window in which they disagree and a breakpoint might get triggered.
449 * Since we use lazy switching, we are forced to assume that a
450 * disagreement means that current is correct and the exception is due
451 * to lazy debug register switching.
452 */
453 if (unlikely(test_tsk_thread_flag(next_p, TIF_DEBUG)))
454 arch_install_thread_hw_breakpoint(next_p);
427 455
428 return prev_p; 456 return prev_p;
429} 457}
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index ebefb5407b9..89c46f1259d 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -52,6 +52,8 @@
52#include <asm/idle.h> 52#include <asm/idle.h>
53#include <asm/syscalls.h> 53#include <asm/syscalls.h>
54#include <asm/ds.h> 54#include <asm/ds.h>
55#include <asm/debugreg.h>
56#include <asm/hw_breakpoint.h>
55 57
56asmlinkage extern void ret_from_fork(void); 58asmlinkage extern void ret_from_fork(void);
57 59
@@ -245,6 +247,8 @@ void release_thread(struct task_struct *dead_task)
245 BUG(); 247 BUG();
246 } 248 }
247 } 249 }
250 if (unlikely(dead_task->thread.debugreg7))
251 flush_thread_hw_breakpoint(dead_task);
248} 252}
249 253
250static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr) 254static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
@@ -300,12 +304,18 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
300 304
301 p->thread.fs = me->thread.fs; 305 p->thread.fs = me->thread.fs;
302 p->thread.gs = me->thread.gs; 306 p->thread.gs = me->thread.gs;
307 p->thread.io_bitmap_ptr = NULL;
303 308
304 savesegment(gs, p->thread.gsindex); 309 savesegment(gs, p->thread.gsindex);
305 savesegment(fs, p->thread.fsindex); 310 savesegment(fs, p->thread.fsindex);
306 savesegment(es, p->thread.es); 311 savesegment(es, p->thread.es);
307 savesegment(ds, p->thread.ds); 312 savesegment(ds, p->thread.ds);
308 313
314 err = -ENOMEM;
315 if (unlikely(test_tsk_thread_flag(me, TIF_DEBUG)))
316 if (copy_thread_hw_breakpoint(me, p, clone_flags))
317 goto out;
318
309 if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { 319 if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
310 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); 320 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
311 if (!p->thread.io_bitmap_ptr) { 321 if (!p->thread.io_bitmap_ptr) {
@@ -344,6 +354,9 @@ out:
344 kfree(p->thread.io_bitmap_ptr); 354 kfree(p->thread.io_bitmap_ptr);
345 p->thread.io_bitmap_max = 0; 355 p->thread.io_bitmap_max = 0;
346 } 356 }
357 if (err)
358 flush_thread_hw_breakpoint(p);
359
347 return err; 360 return err;
348} 361}
349 362
@@ -489,6 +502,24 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
489 */ 502 */
490 if (tsk_used_math(next_p) && next_p->fpu_counter > 5) 503 if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
491 math_state_restore(); 504 math_state_restore();
505 /*
506 * There's a problem with moving the arch_install_thread_hw_breakpoint()
507 * call before current is updated. Suppose a kernel breakpoint is
508 * triggered in between the two, the hw-breakpoint handler will see that
509 * the 'current' task does not have TIF_DEBUG flag set and will think it
510 * is leftover from an old task (lazy switching) and will erase it. Then
511 * until the next context switch, no user-breakpoints will be installed.
512 *
513 * The real problem is that it's impossible to update both current and
514 * physical debug registers at the same instant, so there will always be
515 * a window in which they disagree and a breakpoint might get triggered.
516 * Since we use lazy switching, we are forced to assume that a
517 * disagreement means that current is correct and the exception is due
518 * to lazy debug register switching.
519 */
520 if (unlikely(test_tsk_thread_flag(next_p, TIF_DEBUG)))
521 arch_install_thread_hw_breakpoint(next_p);
522
492 return prev_p; 523 return prev_p;
493} 524}
494 525
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 8d7d5c9c1be..113b8927c82 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -34,6 +34,7 @@
34#include <asm/prctl.h> 34#include <asm/prctl.h>
35#include <asm/proto.h> 35#include <asm/proto.h>
36#include <asm/ds.h> 36#include <asm/ds.h>
37#include <asm/hw_breakpoint.h>
37 38
38#include "tls.h" 39#include "tls.h"
39 40
@@ -137,11 +138,6 @@ static int set_segment_reg(struct task_struct *task,
137 return 0; 138 return 0;
138} 139}
139 140
140static unsigned long debugreg_addr_limit(struct task_struct *task)
141{
142 return TASK_SIZE - 3;
143}
144
145#else /* CONFIG_X86_64 */ 141#else /* CONFIG_X86_64 */
146 142
147#define FLAG_MASK (FLAG_MASK_32 | X86_EFLAGS_NT) 143#define FLAG_MASK (FLAG_MASK_32 | X86_EFLAGS_NT)
@@ -266,15 +262,6 @@ static int set_segment_reg(struct task_struct *task,
266 return 0; 262 return 0;
267} 263}
268 264
269static unsigned long debugreg_addr_limit(struct task_struct *task)
270{
271#ifdef CONFIG_IA32_EMULATION
272 if (test_tsk_thread_flag(task, TIF_IA32))
273 return IA32_PAGE_OFFSET - 3;
274#endif
275 return TASK_SIZE_MAX - 7;
276}
277
278#endif /* CONFIG_X86_32 */ 265#endif /* CONFIG_X86_32 */
279 266
280static unsigned long get_flags(struct task_struct *task) 267static unsigned long get_flags(struct task_struct *task)
@@ -465,95 +452,159 @@ static int genregs_set(struct task_struct *target,
465} 452}
466 453
467/* 454/*
468 * This function is trivial and will be inlined by the compiler. 455 * Decode the length and type bits for a particular breakpoint as
469 * Having it separates the implementation details of debug 456 * stored in debug register 7. Return the "enabled" status.
470 * registers from the interface details of ptrace.
471 */ 457 */
472static unsigned long ptrace_get_debugreg(struct task_struct *child, int n) 458static int decode_dr7(unsigned long dr7, int bpnum, unsigned *len,
459 unsigned *type)
473{ 460{
474 switch (n) { 461 int bp_info = dr7 >> (DR_CONTROL_SHIFT + bpnum * DR_CONTROL_SIZE);
475 case 0: return child->thread.debugreg0; 462
476 case 1: return child->thread.debugreg1; 463 *len = (bp_info & 0xc) | 0x40;
477 case 2: return child->thread.debugreg2; 464 *type = (bp_info & 0x3) | 0x80;
478 case 3: return child->thread.debugreg3; 465 return (dr7 >> (bpnum * DR_ENABLE_SIZE)) & 0x3;
479 case 6: return child->thread.debugreg6;
480 case 7: return child->thread.debugreg7;
481 }
482 return 0;
483} 466}
484 467
485static int ptrace_set_debugreg(struct task_struct *child, 468static void ptrace_triggered(struct hw_breakpoint *bp, struct pt_regs *regs)
486 int n, unsigned long data)
487{ 469{
470 struct thread_struct *thread = &(current->thread);
488 int i; 471 int i;
489 472
490 if (unlikely(n == 4 || n == 5)) 473 /*
491 return -EIO; 474 * Store in the virtual DR6 register the fact that the breakpoint
475 * was hit so the thread's debugger will see it.
476 */
477 for (i = 0; i < hbp_kernel_pos; i++)
478 /*
479 * We will check bp->info.address against the address stored in
480 * thread's hbp structure and not debugreg[i]. This is to ensure
481 * that the corresponding bit for 'i' in DR7 register is enabled
482 */
483 if (bp->info.address == thread->hbp[i]->info.address)
484 break;
492 485
493 if (n < 4 && unlikely(data >= debugreg_addr_limit(child))) 486 thread->debugreg6 |= (DR_TRAP0 << i);
494 return -EIO; 487}
495 488
496 switch (n) { 489/*
497 case 0: child->thread.debugreg0 = data; break; 490 * Handle ptrace writes to debug register 7.
498 case 1: child->thread.debugreg1 = data; break; 491 */
499 case 2: child->thread.debugreg2 = data; break; 492static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data)
500 case 3: child->thread.debugreg3 = data; break; 493{
494 struct thread_struct *thread = &(tsk->thread);
495 unsigned long old_dr7 = thread->debugreg7;
496 int i, orig_ret = 0, rc = 0;
497 int enabled, second_pass = 0;
498 unsigned len, type;
499 struct hw_breakpoint *bp;
500
501 data &= ~DR_CONTROL_RESERVED;
502restore:
503 /*
504 * Loop through all the hardware breakpoints, making the
505 * appropriate changes to each.
506 */
507 for (i = 0; i < HBP_NUM; i++) {
508 enabled = decode_dr7(data, i, &len, &type);
509 bp = thread->hbp[i];
510
511 if (!enabled) {
512 if (bp) {
513 /* Don't unregister the breakpoints right-away,
514 * unless all register_user_hw_breakpoint()
515 * requests have succeeded. This prevents
516 * any window of opportunity for debug
517 * register grabbing by other users.
518 */
519 if (!second_pass)
520 continue;
521 unregister_user_hw_breakpoint(tsk, bp);
522 kfree(bp);
523 }
524 continue;
525 }
526 if (!bp) {
527 rc = -ENOMEM;
528 bp = kzalloc(sizeof(struct hw_breakpoint), GFP_KERNEL);
529 if (bp) {
530 bp->info.address = thread->debugreg[i];
531 bp->triggered = ptrace_triggered;
532 bp->info.len = len;
533 bp->info.type = type;
534 rc = register_user_hw_breakpoint(tsk, bp);
535 if (rc)
536 kfree(bp);
537 }
538 } else
539 rc = modify_user_hw_breakpoint(tsk, bp);
540 if (rc)
541 break;
542 }
543 /*
544 * Make a second pass to free the remaining unused breakpoints
545 * or to restore the original breakpoints if an error occurred.
546 */
547 if (!second_pass) {
548 second_pass = 1;
549 if (rc < 0) {
550 orig_ret = rc;
551 data = old_dr7;
552 }
553 goto restore;
554 }
555 return ((orig_ret < 0) ? orig_ret : rc);
556}
501 557
502 case 6: 558/*
503 if ((data & ~0xffffffffUL) != 0) 559 * Handle PTRACE_PEEKUSR calls for the debug register area.
504 return -EIO; 560 */
505 child->thread.debugreg6 = data; 561static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n)
506 break; 562{
563 struct thread_struct *thread = &(tsk->thread);
564 unsigned long val = 0;
565
566 if (n < HBP_NUM)
567 val = thread->debugreg[n];
568 else if (n == 6)
569 val = thread->debugreg6;
570 else if (n == 7)
571 val = thread->debugreg7;
572 return val;
573}
507 574
508 case 7: 575/*
509 /* 576 * Handle PTRACE_POKEUSR calls for the debug register area.
510 * Sanity-check data. Take one half-byte at once with 577 */
511 * check = (val >> (16 + 4*i)) & 0xf. It contains the 578int ptrace_set_debugreg(struct task_struct *tsk, int n, unsigned long val)
512 * R/Wi and LENi bits; bits 0 and 1 are R/Wi, and bits 579{
513 * 2 and 3 are LENi. Given a list of invalid values, 580 struct thread_struct *thread = &(tsk->thread);
514 * we do mask |= 1 << invalid_value, so that 581 int rc = 0;
515 * (mask >> check) & 1 is a correct test for invalid 582
516 * values. 583 /* There are no DR4 or DR5 registers */
517 * 584 if (n == 4 || n == 5)
518 * R/Wi contains the type of the breakpoint / 585 return -EIO;
519 * watchpoint, LENi contains the length of the watched 586
520 * data in the watchpoint case. 587 if (n == 6) {
521 * 588 tsk->thread.debugreg6 = val;
522 * The invalid values are: 589 goto ret_path;
523 * - LENi == 0x10 (undefined), so mask |= 0x0f00. [32-bit]
524 * - R/Wi == 0x10 (break on I/O reads or writes), so
525 * mask |= 0x4444.
526 * - R/Wi == 0x00 && LENi != 0x00, so we have mask |=
527 * 0x1110.
528 *
529 * Finally, mask = 0x0f00 | 0x4444 | 0x1110 == 0x5f54.
530 *
531 * See the Intel Manual "System Programming Guide",
532 * 15.2.4
533 *
534 * Note that LENi == 0x10 is defined on x86_64 in long
535 * mode (i.e. even for 32-bit userspace software, but
536 * 64-bit kernel), so the x86_64 mask value is 0x5454.
537 * See the AMD manual no. 24593 (AMD64 System Programming)
538 */
539#ifdef CONFIG_X86_32
540#define DR7_MASK 0x5f54
541#else
542#define DR7_MASK 0x5554
543#endif
544 data &= ~DR_CONTROL_RESERVED;
545 for (i = 0; i < 4; i++)
546 if ((DR7_MASK >> ((data >> (16 + 4*i)) & 0xf)) & 1)
547 return -EIO;
548 child->thread.debugreg7 = data;
549 if (data)
550 set_tsk_thread_flag(child, TIF_DEBUG);
551 else
552 clear_tsk_thread_flag(child, TIF_DEBUG);
553 break;
554 } 590 }
591 if (n < HBP_NUM) {
592 if (thread->hbp[n]) {
593 if (arch_check_va_in_userspace(val,
594 thread->hbp[n]->info.len) == 0) {
595 rc = -EIO;
596 goto ret_path;
597 }
598 thread->hbp[n]->info.address = val;
599 }
600 thread->debugreg[n] = val;
601 }
602 /* All that's left is DR7 */
603 if (n == 7)
604 rc = ptrace_write_dr7(tsk, val);
555 605
556 return 0; 606ret_path:
607 return rc;
557} 608}
558 609
559/* 610/*
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 4c578751e94..0f89a4f20db 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -799,15 +799,6 @@ static void do_signal(struct pt_regs *regs)
799 799
800 signr = get_signal_to_deliver(&info, &ka, regs, NULL); 800 signr = get_signal_to_deliver(&info, &ka, regs, NULL);
801 if (signr > 0) { 801 if (signr > 0) {
802 /*
803 * Re-enable any watchpoints before delivering the
804 * signal to user space. The processor register will
805 * have been cleared if the watchpoint triggered
806 * inside the kernel.
807 */
808 if (current->thread.debugreg7)
809 set_debugreg(current->thread.debugreg7, 7);
810
811 /* Whee! Actually deliver the signal. */ 802 /* Whee! Actually deliver the signal. */
812 if (handle_signal(signr, &info, &ka, oldset, regs) == 0) { 803 if (handle_signal(signr, &info, &ka, oldset, regs) == 0) {
813 /* 804 /*
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 2fecda69ee6..dee0f3d814a 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -63,6 +63,7 @@
63#include <asm/apic.h> 63#include <asm/apic.h>
64#include <asm/setup.h> 64#include <asm/setup.h>
65#include <asm/uv/uv.h> 65#include <asm/uv/uv.h>
66#include <asm/debugreg.h>
66#include <linux/mc146818rtc.h> 67#include <linux/mc146818rtc.h>
67 68
68#include <asm/smpboot_hooks.h> 69#include <asm/smpboot_hooks.h>
@@ -326,6 +327,7 @@ notrace static void __cpuinit start_secondary(void *unused)
326 setup_secondary_clock(); 327 setup_secondary_clock();
327 328
328 wmb(); 329 wmb();
330 load_debug_registers();
329 cpu_idle(); 331 cpu_idle();
330} 332}
331 333
@@ -1254,6 +1256,7 @@ void cpu_disable_common(void)
1254 remove_cpu_from_maps(cpu); 1256 remove_cpu_from_maps(cpu);
1255 unlock_vector_lock(); 1257 unlock_vector_lock();
1256 fixup_irqs(); 1258 fixup_irqs();
1259 hw_breakpoint_disable();
1257} 1260}
1258 1261
1259int native_cpu_disable(void) 1262int native_cpu_disable(void)
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 5204332f475..ae04589a579 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -532,77 +532,56 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
532dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) 532dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
533{ 533{
534 struct task_struct *tsk = current; 534 struct task_struct *tsk = current;
535 unsigned long condition; 535 unsigned long dr6;
536 int si_code; 536 int si_code;
537 537
538 get_debugreg(condition, 6); 538 get_debugreg(dr6, 6);
539 539
540 /* Catch kmemcheck conditions first of all! */ 540 /* Catch kmemcheck conditions first of all! */
541 if (condition & DR_STEP && kmemcheck_trap(regs)) 541 if ((dr6 & DR_STEP) && kmemcheck_trap(regs))
542 return; 542 return;
543 543
544 /* DR6 may or may not be cleared by the CPU */
545 set_debugreg(0, 6);
544 /* 546 /*
545 * The processor cleared BTF, so don't mark that we need it set. 547 * The processor cleared BTF, so don't mark that we need it set.
546 */ 548 */
547 clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR); 549 clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR);
548 tsk->thread.debugctlmsr = 0; 550 tsk->thread.debugctlmsr = 0;
549 551
550 if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, 552 /* Store the virtualized DR6 value */
551 SIGTRAP) == NOTIFY_STOP) 553 tsk->thread.debugreg6 = dr6;
554
555 if (notify_die(DIE_DEBUG, "debug", regs, PTR_ERR(&dr6), error_code,
556 SIGTRAP) == NOTIFY_STOP)
552 return; 557 return;
553 558
554 /* It's safe to allow irq's after DR6 has been saved */ 559 /* It's safe to allow irq's after DR6 has been saved */
555 preempt_conditional_sti(regs); 560 preempt_conditional_sti(regs);
556 561
557 /* Mask out spurious debug traps due to lazy DR7 setting */ 562 if (regs->flags & X86_VM_MASK) {
558 if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { 563 handle_vm86_trap((struct kernel_vm86_regs *) regs,
559 if (!tsk->thread.debugreg7) 564 error_code, 1);
560 goto clear_dr7; 565 return;
561 } 566 }
562 567
563#ifdef CONFIG_X86_32
564 if (regs->flags & X86_VM_MASK)
565 goto debug_vm86;
566#endif
567
568 /* Save debug status register where ptrace can see it */
569 tsk->thread.debugreg6 = condition;
570
571 /* 568 /*
572 * Single-stepping through TF: make sure we ignore any events in 569 * Single-stepping through system calls: ignore any exceptions in
573 * kernel space (but re-enable TF when returning to user mode). 570 * kernel space, but re-enable TF when returning to user mode.
571 *
572 * We already checked v86 mode above, so we can check for kernel mode
573 * by just checking the CPL of CS.
574 */ 574 */
575 if (condition & DR_STEP) { 575 if ((dr6 & DR_STEP) && !user_mode(regs)) {
576 if (!user_mode(regs)) 576 tsk->thread.debugreg6 &= ~DR_STEP;
577 goto clear_TF_reenable; 577 set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
578 regs->flags &= ~X86_EFLAGS_TF;
578 } 579 }
579 580 si_code = get_si_code(tsk->thread.debugreg6);
580 si_code = get_si_code(condition); 581 if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS))
581 /* Ok, finally something we can handle */ 582 send_sigtrap(tsk, regs, error_code, si_code);
582 send_sigtrap(tsk, regs, error_code, si_code);
583
584 /*
585 * Disable additional traps. They'll be re-enabled when
586 * the signal is delivered.
587 */
588clear_dr7:
589 set_debugreg(0, 7);
590 preempt_conditional_cli(regs); 583 preempt_conditional_cli(regs);
591 return;
592 584
593#ifdef CONFIG_X86_32
594debug_vm86:
595 /* reenable preemption: handle_vm86_trap() might sleep */
596 dec_preempt_count();
597 handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1);
598 conditional_cli(regs);
599 return;
600#endif
601
602clear_TF_reenable:
603 set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
604 regs->flags &= ~X86_EFLAGS_TF;
605 preempt_conditional_cli(regs);
606 return; 585 return;
607} 586}
608 587
diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c
index 16ccbd77917..11a4ad4d625 100644
--- a/arch/x86/mm/kmmio.c
+++ b/arch/x86/mm/kmmio.c
@@ -540,8 +540,14 @@ kmmio_die_notifier(struct notifier_block *nb, unsigned long val, void *args)
540 struct die_args *arg = args; 540 struct die_args *arg = args;
541 541
542 if (val == DIE_DEBUG && (arg->err & DR_STEP)) 542 if (val == DIE_DEBUG && (arg->err & DR_STEP))
543 if (post_kmmio_handler(arg->err, arg->regs) == 1) 543 if (post_kmmio_handler(arg->err, arg->regs) == 1) {
544 /*
545 * Reset the BS bit in dr6 (pointed by args->err) to
546 * denote completion of processing
547 */
548 (*(unsigned long *)ERR_PTR(arg->err)) &= ~DR_STEP;
544 return NOTIFY_STOP; 549 return NOTIFY_STOP;
550 }
545 551
546 return NOTIFY_DONE; 552 return NOTIFY_DONE;
547} 553}
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index b3d20b9cac6..9e63db8cdee 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -18,6 +18,7 @@
18#include <asm/mce.h> 18#include <asm/mce.h>
19#include <asm/xcr.h> 19#include <asm/xcr.h>
20#include <asm/suspend.h> 20#include <asm/suspend.h>
21#include <asm/debugreg.h>
21 22
22#ifdef CONFIG_X86_32 23#ifdef CONFIG_X86_32
23static struct saved_context saved_context; 24static struct saved_context saved_context;
@@ -104,6 +105,7 @@ static void __save_processor_state(struct saved_context *ctxt)
104 ctxt->cr4 = read_cr4(); 105 ctxt->cr4 = read_cr4();
105 ctxt->cr8 = read_cr8(); 106 ctxt->cr8 = read_cr8();
106#endif 107#endif
108 hw_breakpoint_disable();
107} 109}
108 110
109/* Needed by apm.c */ 111/* Needed by apm.c */
@@ -146,27 +148,7 @@ static void fix_processor_context(void)
146 /* 148 /*
147 * Now maybe reload the debug registers 149 * Now maybe reload the debug registers
148 */ 150 */
149 if (current->thread.debugreg7) { 151 load_debug_registers();
150#ifdef CONFIG_X86_32
151 set_debugreg(current->thread.debugreg0, 0);
152 set_debugreg(current->thread.debugreg1, 1);
153 set_debugreg(current->thread.debugreg2, 2);
154 set_debugreg(current->thread.debugreg3, 3);
155 /* no 4 and 5 */
156 set_debugreg(current->thread.debugreg6, 6);
157 set_debugreg(current->thread.debugreg7, 7);
158#else
159 /* CONFIG_X86_64 */
160 loaddebug(&current->thread, 0);
161 loaddebug(&current->thread, 1);
162 loaddebug(&current->thread, 2);
163 loaddebug(&current->thread, 3);
164 /* no 4 and 5 */
165 loaddebug(&current->thread, 6);
166 loaddebug(&current->thread, 7);
167#endif
168 }
169
170} 152}
171 153
172/** 154/**
diff --git a/include/asm-generic/hw_breakpoint.h b/include/asm-generic/hw_breakpoint.h
new file mode 100644
index 00000000000..9bf2d12eb74
--- /dev/null
+++ b/include/asm-generic/hw_breakpoint.h
@@ -0,0 +1,139 @@
1#ifndef _ASM_GENERIC_HW_BREAKPOINT_H
2#define _ASM_GENERIC_HW_BREAKPOINT_H
3
4#ifndef __ARCH_HW_BREAKPOINT_H
5#error "Please don't include this file directly"
6#endif
7
8#ifdef __KERNEL__
9#include <linux/list.h>
10#include <linux/types.h>
11#include <linux/kallsyms.h>
12
13/**
14 * struct hw_breakpoint - unified kernel/user-space hardware breakpoint
15 * @triggered: callback invoked after target address access
16 * @info: arch-specific breakpoint info (address, length, and type)
17 *
18 * %hw_breakpoint structures are the kernel's way of representing
19 * hardware breakpoints. These are data breakpoints
20 * (also known as "watchpoints", triggered on data access), and the breakpoint's
21 * target address can be located in either kernel space or user space.
22 *
23 * The breakpoint's address, length, and type are highly
24 * architecture-specific. The values are encoded in the @info field; you
25 * specify them when registering the breakpoint. To examine the encoded
26 * values use hw_breakpoint_get_{kaddress,uaddress,len,type}(), declared
27 * below.
28 *
29 * The address is specified as a regular kernel pointer (for kernel-space
30 * breakponts) or as an %__user pointer (for user-space breakpoints).
31 * With register_user_hw_breakpoint(), the address must refer to a
32 * location in user space. The breakpoint will be active only while the
33 * requested task is running. Conversely with
34 * register_kernel_hw_breakpoint(), the address must refer to a location
35 * in kernel space, and the breakpoint will be active on all CPUs
36 * regardless of the current task.
37 *
38 * The length is the breakpoint's extent in bytes, which is subject to
39 * certain limitations. include/asm/hw_breakpoint.h contains macros
40 * defining the available lengths for a specific architecture. Note that
41 * the address's alignment must match the length. The breakpoint will
42 * catch accesses to any byte in the range from address to address +
43 * (length - 1).
44 *
45 * The breakpoint's type indicates the sort of access that will cause it
46 * to trigger. Possible values may include:
47 *
48 * %HW_BREAKPOINT_RW (triggered on read or write access),
49 * %HW_BREAKPOINT_WRITE (triggered on write access), and
50 * %HW_BREAKPOINT_READ (triggered on read access).
51 *
52 * Appropriate macros are defined in include/asm/hw_breakpoint.h; not all
53 * possibilities are available on all architectures. Execute breakpoints
54 * must have length equal to the special value %HW_BREAKPOINT_LEN_EXECUTE.
55 *
56 * When a breakpoint gets hit, the @triggered callback is
57 * invoked in_interrupt with a pointer to the %hw_breakpoint structure and the
58 * processor registers.
59 * Data breakpoints occur after the memory access has taken place.
60 * Breakpoints are disabled during execution @triggered, to avoid
61 * recursive traps and allow unhindered access to breakpointed memory.
62 *
63 * This sample code sets a breakpoint on pid_max and registers a callback
64 * function for writes to that variable. Note that it is not portable
65 * as written, because not all architectures support HW_BREAKPOINT_LEN_4.
66 *
67 * ----------------------------------------------------------------------
68 *
69 * #include <asm/hw_breakpoint.h>
70 *
71 * struct hw_breakpoint my_bp;
72 *
73 * static void my_triggered(struct hw_breakpoint *bp, struct pt_regs *regs)
74 * {
75 * printk(KERN_DEBUG "Inside triggered routine of breakpoint exception\n");
76 * dump_stack();
77 * .......<more debugging output>........
78 * }
79 *
80 * static struct hw_breakpoint my_bp;
81 *
82 * static int init_module(void)
83 * {
84 * ..........<do anything>............
85 * my_bp.info.type = HW_BREAKPOINT_WRITE;
86 * my_bp.info.len = HW_BREAKPOINT_LEN_4;
87 *
88 * my_bp.installed = (void *)my_bp_installed;
89 *
90 * rc = register_kernel_hw_breakpoint(&my_bp);
91 * ..........<do anything>............
92 * }
93 *
94 * static void cleanup_module(void)
95 * {
96 * ..........<do anything>............
97 * unregister_kernel_hw_breakpoint(&my_bp);
98 * ..........<do anything>............
99 * }
100 *
101 * ----------------------------------------------------------------------
102 */
103struct hw_breakpoint {
104 void (*triggered)(struct hw_breakpoint *, struct pt_regs *);
105 struct arch_hw_breakpoint info;
106};
107
108/*
109 * len and type values are defined in include/asm/hw_breakpoint.h.
110 * Available values vary according to the architecture. On i386 the
111 * possibilities are:
112 *
113 * HW_BREAKPOINT_LEN_1
114 * HW_BREAKPOINT_LEN_2
115 * HW_BREAKPOINT_LEN_4
116 * HW_BREAKPOINT_RW
117 * HW_BREAKPOINT_READ
118 *
119 * On other architectures HW_BREAKPOINT_LEN_8 may be available, and the
120 * 1-, 2-, and 4-byte lengths may be unavailable. There also may be
121 * HW_BREAKPOINT_WRITE. You can use #ifdef to check at compile time.
122 */
123
124extern int register_user_hw_breakpoint(struct task_struct *tsk,
125 struct hw_breakpoint *bp);
126extern int modify_user_hw_breakpoint(struct task_struct *tsk,
127 struct hw_breakpoint *bp);
128extern void unregister_user_hw_breakpoint(struct task_struct *tsk,
129 struct hw_breakpoint *bp);
130/*
131 * Kernel breakpoints are not associated with any particular thread.
132 */
133extern int register_kernel_hw_breakpoint(struct hw_breakpoint *bp);
134extern void unregister_kernel_hw_breakpoint(struct hw_breakpoint *bp);
135
136extern unsigned int hbp_kernel_pos;
137
138#endif /* __KERNEL__ */
139#endif /* _ASM_GENERIC_HW_BREAKPOINT_H */
diff --git a/kernel/Makefile b/kernel/Makefile
index 2093a691f1c..52508612a08 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -99,6 +99,7 @@ obj-$(CONFIG_X86_DS) += trace/
99obj-$(CONFIG_RING_BUFFER) += trace/ 99obj-$(CONFIG_RING_BUFFER) += trace/
100obj-$(CONFIG_SMP) += sched_cpupri.o 100obj-$(CONFIG_SMP) += sched_cpupri.o
101obj-$(CONFIG_SLOW_WORK) += slow-work.o 101obj-$(CONFIG_SLOW_WORK) += slow-work.o
102obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
102obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o 103obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o
103 104
104ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) 105ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
new file mode 100644
index 00000000000..c1f64e65a9f
--- /dev/null
+++ b/kernel/hw_breakpoint.c
@@ -0,0 +1,378 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License as published by
4 * the Free Software Foundation; either version 2 of the License, or
5 * (at your option) any later version.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * along with this program; if not, write to the Free Software
14 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
15 *
16 * Copyright (C) 2007 Alan Stern
17 * Copyright (C) IBM Corporation, 2009
18 */
19
20/*
21 * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility,
22 * using the CPU's debug registers.
23 * This file contains the arch-independent routines.
24 */
25
26#include <linux/irqflags.h>
27#include <linux/kallsyms.h>
28#include <linux/notifier.h>
29#include <linux/kprobes.h>
30#include <linux/kdebug.h>
31#include <linux/kernel.h>
32#include <linux/module.h>
33#include <linux/percpu.h>
34#include <linux/sched.h>
35#include <linux/init.h>
36#include <linux/smp.h>
37
38#include <asm/hw_breakpoint.h>
39#include <asm/processor.h>
40
41#ifdef CONFIG_X86
42#include <asm/debugreg.h>
43#endif
44/*
45 * Spinlock that protects all (un)register operations over kernel/user-space
46 * breakpoint requests
47 */
48static DEFINE_SPINLOCK(hw_breakpoint_lock);
49
50/* Array of kernel-space breakpoint structures */
51struct hw_breakpoint *hbp_kernel[HBP_NUM];
52
53/*
54 * Per-processor copy of hbp_kernel[]. Used only when hbp_kernel is being
55 * modified but we need the older copy to handle any hbp exceptions. It will
56 * sync with hbp_kernel[] value after updation is done through IPIs.
57 */
58DEFINE_PER_CPU(struct hw_breakpoint*, this_hbp_kernel[HBP_NUM]);
59
60/*
61 * Kernel breakpoints grow downwards, starting from HBP_NUM
62 * 'hbp_kernel_pos' denotes lowest numbered breakpoint register occupied for
63 * kernel-space request. We will initialise it here and not in an __init
64 * routine because load_debug_registers(), which uses this variable can be
65 * called very early during CPU initialisation.
66 */
67unsigned int hbp_kernel_pos = HBP_NUM;
68
69/*
70 * An array containing refcount of threads using a given bkpt register
71 * Accesses are synchronised by acquiring hw_breakpoint_lock
72 */
73unsigned int hbp_user_refcount[HBP_NUM];
74
75/*
76 * Load the debug registers during startup of a CPU.
77 */
78void load_debug_registers(void)
79{
80 unsigned long flags;
81 struct task_struct *tsk = current;
82
83 spin_lock_bh(&hw_breakpoint_lock);
84
85 /* Prevent IPIs for new kernel breakpoint updates */
86 local_irq_save(flags);
87 arch_update_kernel_hw_breakpoint(NULL);
88 local_irq_restore(flags);
89
90 if (test_tsk_thread_flag(tsk, TIF_DEBUG))
91 arch_install_thread_hw_breakpoint(tsk);
92
93 spin_unlock_bh(&hw_breakpoint_lock);
94}
95
96/*
97 * Erase all the hardware breakpoint info associated with a thread.
98 *
99 * If tsk != current then tsk must not be usable (for example, a
100 * child being cleaned up from a failed fork).
101 */
102void flush_thread_hw_breakpoint(struct task_struct *tsk)
103{
104 int i;
105 struct thread_struct *thread = &(tsk->thread);
106
107 spin_lock_bh(&hw_breakpoint_lock);
108
109 /* The thread no longer has any breakpoints associated with it */
110 clear_tsk_thread_flag(tsk, TIF_DEBUG);
111 for (i = 0; i < HBP_NUM; i++) {
112 if (thread->hbp[i]) {
113 hbp_user_refcount[i]--;
114 kfree(thread->hbp[i]);
115 thread->hbp[i] = NULL;
116 }
117 }
118
119 arch_flush_thread_hw_breakpoint(tsk);
120
121 /* Actually uninstall the breakpoints if necessary */
122 if (tsk == current)
123 arch_uninstall_thread_hw_breakpoint();
124 spin_unlock_bh(&hw_breakpoint_lock);
125}
126
127/*
128 * Copy the hardware breakpoint info from a thread to its cloned child.
129 */
130int copy_thread_hw_breakpoint(struct task_struct *tsk,
131 struct task_struct *child, unsigned long clone_flags)
132{
133 /*
134 * We will assume that breakpoint settings are not inherited
135 * and the child starts out with no debug registers set.
136 * But what about CLONE_PTRACE?
137 */
138 clear_tsk_thread_flag(child, TIF_DEBUG);
139
140 /* We will call flush routine since the debugregs are not inherited */
141 arch_flush_thread_hw_breakpoint(child);
142
143 return 0;
144}
145
146static int __register_user_hw_breakpoint(int pos, struct task_struct *tsk,
147 struct hw_breakpoint *bp)
148{
149 struct thread_struct *thread = &(tsk->thread);
150 int rc;
151
152 /* Do not overcommit. Fail if kernel has used the hbp registers */
153 if (pos >= hbp_kernel_pos)
154 return -ENOSPC;
155
156 rc = arch_validate_hwbkpt_settings(bp, tsk);
157 if (rc)
158 return rc;
159
160 thread->hbp[pos] = bp;
161 hbp_user_refcount[pos]++;
162
163 arch_update_user_hw_breakpoint(pos, tsk);
164 /*
165 * Does it need to be installed right now?
166 * Otherwise it will get installed the next time tsk runs
167 */
168 if (tsk == current)
169 arch_install_thread_hw_breakpoint(tsk);
170
171 return rc;
172}
173
174/*
175 * Modify the address of a hbp register already in use by the task
176 * Do not invoke this in-lieu of a __unregister_user_hw_breakpoint()
177 */
178static int __modify_user_hw_breakpoint(int pos, struct task_struct *tsk,
179 struct hw_breakpoint *bp)
180{
181 struct thread_struct *thread = &(tsk->thread);
182
183 if ((pos >= hbp_kernel_pos) || (arch_validate_hwbkpt_settings(bp, tsk)))
184 return -EINVAL;
185
186 if (thread->hbp[pos] == NULL)
187 return -EINVAL;
188
189 thread->hbp[pos] = bp;
190 /*
191 * 'pos' must be that of a hbp register already used by 'tsk'
192 * Otherwise arch_modify_user_hw_breakpoint() will fail
193 */
194 arch_update_user_hw_breakpoint(pos, tsk);
195
196 if (tsk == current)
197 arch_install_thread_hw_breakpoint(tsk);
198
199 return 0;
200}
201
202static void __unregister_user_hw_breakpoint(int pos, struct task_struct *tsk)
203{
204 hbp_user_refcount[pos]--;
205 tsk->thread.hbp[pos] = NULL;
206
207 arch_update_user_hw_breakpoint(pos, tsk);
208
209 if (tsk == current)
210 arch_install_thread_hw_breakpoint(tsk);
211}
212
213/**
214 * register_user_hw_breakpoint - register a hardware breakpoint for user space
215 * @tsk: pointer to 'task_struct' of the process to which the address belongs
216 * @bp: the breakpoint structure to register
217 *
218 * @bp.info->name or @bp.info->address, @bp.info->len, @bp.info->type and
219 * @bp->triggered must be set properly before invocation
220 *
221 */
222int register_user_hw_breakpoint(struct task_struct *tsk,
223 struct hw_breakpoint *bp)
224{
225 struct thread_struct *thread = &(tsk->thread);
226 int i, rc = -ENOSPC;
227
228 spin_lock_bh(&hw_breakpoint_lock);
229
230 for (i = 0; i < hbp_kernel_pos; i++) {
231 if (!thread->hbp[i]) {
232 rc = __register_user_hw_breakpoint(i, tsk, bp);
233 break;
234 }
235 }
236 if (!rc)
237 set_tsk_thread_flag(tsk, TIF_DEBUG);
238
239 spin_unlock_bh(&hw_breakpoint_lock);
240 return rc;
241}
242EXPORT_SYMBOL_GPL(register_user_hw_breakpoint);
243
244/**
245 * modify_user_hw_breakpoint - modify a user-space hardware breakpoint
246 * @tsk: pointer to 'task_struct' of the process to which the address belongs
247 * @bp: the breakpoint structure to unregister
248 *
249 */
250int modify_user_hw_breakpoint(struct task_struct *tsk, struct hw_breakpoint *bp)
251{
252 struct thread_struct *thread = &(tsk->thread);
253 int i, ret = -ENOENT;
254
255 spin_lock_bh(&hw_breakpoint_lock);
256 for (i = 0; i < hbp_kernel_pos; i++) {
257 if (bp == thread->hbp[i]) {
258 ret = __modify_user_hw_breakpoint(i, tsk, bp);
259 break;
260 }
261 }
262 spin_unlock_bh(&hw_breakpoint_lock);
263 return ret;
264}
265EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint);
266
267/**
268 * unregister_user_hw_breakpoint - unregister a user-space hardware breakpoint
269 * @tsk: pointer to 'task_struct' of the process to which the address belongs
270 * @bp: the breakpoint structure to unregister
271 *
272 */
273void unregister_user_hw_breakpoint(struct task_struct *tsk,
274 struct hw_breakpoint *bp)
275{
276 struct thread_struct *thread = &(tsk->thread);
277 int i, pos = -1, hbp_counter = 0;
278
279 spin_lock_bh(&hw_breakpoint_lock);
280 for (i = 0; i < hbp_kernel_pos; i++) {
281 if (thread->hbp[i])
282 hbp_counter++;
283 if (bp == thread->hbp[i])
284 pos = i;
285 }
286 if (pos >= 0) {
287 __unregister_user_hw_breakpoint(pos, tsk);
288 hbp_counter--;
289 }
290 if (!hbp_counter)
291 clear_tsk_thread_flag(tsk, TIF_DEBUG);
292
293 spin_unlock_bh(&hw_breakpoint_lock);
294}
295EXPORT_SYMBOL_GPL(unregister_user_hw_breakpoint);
296
297/**
298 * register_kernel_hw_breakpoint - register a hardware breakpoint for kernel space
299 * @bp: the breakpoint structure to register
300 *
301 * @bp.info->name or @bp.info->address, @bp.info->len, @bp.info->type and
302 * @bp->triggered must be set properly before invocation
303 *
304 */
305int register_kernel_hw_breakpoint(struct hw_breakpoint *bp)
306{
307 int rc;
308
309 rc = arch_validate_hwbkpt_settings(bp, NULL);
310 if (rc)
311 return rc;
312
313 spin_lock_bh(&hw_breakpoint_lock);
314
315 rc = -ENOSPC;
316 /* Check if we are over-committing */
317 if ((hbp_kernel_pos > 0) && (!hbp_user_refcount[hbp_kernel_pos-1])) {
318 hbp_kernel_pos--;
319 hbp_kernel[hbp_kernel_pos] = bp;
320 on_each_cpu(arch_update_kernel_hw_breakpoint, NULL, 1);
321 rc = 0;
322 }
323
324 spin_unlock_bh(&hw_breakpoint_lock);
325 return rc;
326}
327EXPORT_SYMBOL_GPL(register_kernel_hw_breakpoint);
328
329/**
330 * unregister_kernel_hw_breakpoint - unregister a HW breakpoint for kernel space
331 * @bp: the breakpoint structure to unregister
332 *
333 * Uninstalls and unregisters @bp.
334 */
335void unregister_kernel_hw_breakpoint(struct hw_breakpoint *bp)
336{
337 int i, j;
338
339 spin_lock_bh(&hw_breakpoint_lock);
340
341 /* Find the 'bp' in our list of breakpoints for kernel */
342 for (i = hbp_kernel_pos; i < HBP_NUM; i++)
343 if (bp == hbp_kernel[i])
344 break;
345
346 /* Check if we did not find a match for 'bp'. If so return early */
347 if (i == HBP_NUM) {
348 spin_unlock_bh(&hw_breakpoint_lock);
349 return;
350 }
351
352 /*
353 * We'll shift the breakpoints one-level above to compact if
354 * unregistration creates a hole
355 */
356 for (j = i; j > hbp_kernel_pos; j--)
357 hbp_kernel[j] = hbp_kernel[j-1];
358
359 hbp_kernel[hbp_kernel_pos] = NULL;
360 on_each_cpu(arch_update_kernel_hw_breakpoint, NULL, 1);
361 hbp_kernel_pos++;
362
363 spin_unlock_bh(&hw_breakpoint_lock);
364}
365EXPORT_SYMBOL_GPL(unregister_kernel_hw_breakpoint);
366
367static struct notifier_block hw_breakpoint_exceptions_nb = {
368 .notifier_call = hw_breakpoint_exceptions_notify,
369 /* we need to be notified first */
370 .priority = 0x7fffffff
371};
372
373static int __init init_hw_breakpoint(void)
374{
375 return register_die_notifier(&hw_breakpoint_exceptions_nb);
376}
377
378core_initcall(init_hw_breakpoint);
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 1ea0d1234f4..5efeb4229ea 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -329,6 +329,27 @@ config POWER_TRACER
329 power management decisions, specifically the C-state and P-state 329 power management decisions, specifically the C-state and P-state
330 behavior. 330 behavior.
331 331
332config KSYM_TRACER
333 bool "Trace read and write access on kernel memory locations"
334 depends on HAVE_HW_BREAKPOINT
335 select TRACING
336 help
337 This tracer helps find read and write operations on any given kernel
338 symbol i.e. /proc/kallsyms.
339
340config PROFILE_KSYM_TRACER
341 bool "Profile all kernel memory accesses on 'watched' variables"
342 depends on KSYM_TRACER
343 help
344 This tracer profiles kernel accesses on variables watched through the
345 ksym tracer ftrace plugin. Depending upon the hardware, all read
346 and write operations on kernel variables can be monitored for
347 accesses.
348
349 The results will be displayed in:
350 /debugfs/tracing/profile_ksym
351
352 Say N if unsure.
332 353
333config STACK_TRACER 354config STACK_TRACER
334 bool "Trace max stack" 355 bool "Trace max stack"
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 844164dca90..ce3b1cd0273 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -54,5 +54,6 @@ obj-$(CONFIG_EVENT_TRACING) += trace_export.o
54obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o 54obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o
55obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o 55obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o
56obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o 56obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
57obj-$(CONFIG_KSYM_TRACER) += trace_ksym.o
57 58
58libftrace-y := ftrace.o 59libftrace-y := ftrace.o
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index fa1dccb579d..ea7e0bcbd53 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -15,6 +15,10 @@
15#include <linux/trace_seq.h> 15#include <linux/trace_seq.h>
16#include <linux/ftrace_event.h> 16#include <linux/ftrace_event.h>
17 17
18#ifdef CONFIG_KSYM_TRACER
19#include <asm/hw_breakpoint.h>
20#endif
21
18enum trace_type { 22enum trace_type {
19 __TRACE_FIRST_TYPE = 0, 23 __TRACE_FIRST_TYPE = 0,
20 24
@@ -38,6 +42,7 @@ enum trace_type {
38 TRACE_KMEM_FREE, 42 TRACE_KMEM_FREE,
39 TRACE_POWER, 43 TRACE_POWER,
40 TRACE_BLK, 44 TRACE_BLK,
45 TRACE_KSYM,
41 46
42 __TRACE_LAST_TYPE, 47 __TRACE_LAST_TYPE,
43}; 48};
@@ -205,6 +210,16 @@ struct syscall_trace_exit {
205 unsigned long ret; 210 unsigned long ret;
206}; 211};
207 212
213#define KSYM_SELFTEST_ENTRY "ksym_selftest_dummy"
214extern int process_new_ksym_entry(char *ksymname, int op, unsigned long addr);
215
216struct ksym_trace_entry {
217 struct trace_entry ent;
218 unsigned long ip;
219 unsigned char type;
220 char ksym_name[KSYM_NAME_LEN];
221 char cmd[TASK_COMM_LEN];
222};
208 223
209/* 224/*
210 * trace_flag_type is an enumeration that holds different 225 * trace_flag_type is an enumeration that holds different
@@ -315,6 +330,7 @@ extern void __ftrace_bad_type(void);
315 TRACE_KMEM_ALLOC); \ 330 TRACE_KMEM_ALLOC); \
316 IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \ 331 IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \
317 TRACE_KMEM_FREE); \ 332 TRACE_KMEM_FREE); \
333 IF_ASSIGN(var, ent, struct ksym_trace_entry, TRACE_KSYM);\
318 __ftrace_bad_type(); \ 334 __ftrace_bad_type(); \
319 } while (0) 335 } while (0)
320 336
@@ -558,6 +574,8 @@ extern int trace_selftest_startup_branch(struct tracer *trace,
558 struct trace_array *tr); 574 struct trace_array *tr);
559extern int trace_selftest_startup_hw_branches(struct tracer *trace, 575extern int trace_selftest_startup_hw_branches(struct tracer *trace,
560 struct trace_array *tr); 576 struct trace_array *tr);
577extern int trace_selftest_startup_ksym(struct tracer *trace,
578 struct trace_array *tr);
561#endif /* CONFIG_FTRACE_STARTUP_TEST */ 579#endif /* CONFIG_FTRACE_STARTUP_TEST */
562 580
563extern void *head_page(struct trace_array_cpu *data); 581extern void *head_page(struct trace_array_cpu *data);
diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c
new file mode 100644
index 00000000000..2fde875ead4
--- /dev/null
+++ b/kernel/trace/trace_ksym.c
@@ -0,0 +1,550 @@
1/*
2 * trace_ksym.c - Kernel Symbol Tracer
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * Copyright (C) IBM Corporation, 2009
19 */
20
21#include <linux/kallsyms.h>
22#include <linux/uaccess.h>
23#include <linux/debugfs.h>
24#include <linux/ftrace.h>
25#include <linux/module.h>
26#include <linux/fs.h>
27
28#include "trace_output.h"
29#include "trace_stat.h"
30#include "trace.h"
31
32/* For now, let us restrict the no. of symbols traced simultaneously to number
33 * of available hardware breakpoint registers.
34 */
35#define KSYM_TRACER_MAX HBP_NUM
36
37#define KSYM_TRACER_OP_LEN 3 /* rw- */
38
39struct trace_ksym {
40 struct hw_breakpoint *ksym_hbp;
41 unsigned long ksym_addr;
42#ifdef CONFIG_PROFILE_KSYM_TRACER
43 unsigned long counter;
44#endif
45 struct hlist_node ksym_hlist;
46};
47
48static struct trace_array *ksym_trace_array;
49
50static unsigned int ksym_filter_entry_count;
51static unsigned int ksym_tracing_enabled;
52
53static HLIST_HEAD(ksym_filter_head);
54
55static DEFINE_MUTEX(ksym_tracer_mutex);
56
57#ifdef CONFIG_PROFILE_KSYM_TRACER
58
59#define MAX_UL_INT 0xffffffff
60
61void ksym_collect_stats(unsigned long hbp_hit_addr)
62{
63 struct hlist_node *node;
64 struct trace_ksym *entry;
65
66 rcu_read_lock();
67 hlist_for_each_entry_rcu(entry, node, &ksym_filter_head, ksym_hlist) {
68 if ((entry->ksym_addr == hbp_hit_addr) &&
69 (entry->counter <= MAX_UL_INT)) {
70 entry->counter++;
71 break;
72 }
73 }
74 rcu_read_unlock();
75}
76#endif /* CONFIG_PROFILE_KSYM_TRACER */
77
78void ksym_hbp_handler(struct hw_breakpoint *hbp, struct pt_regs *regs)
79{
80 struct ring_buffer_event *event;
81 struct trace_array *tr;
82 struct ksym_trace_entry *entry;
83 int pc;
84
85 if (!ksym_tracing_enabled)
86 return;
87
88 tr = ksym_trace_array;
89 pc = preempt_count();
90
91 event = trace_buffer_lock_reserve(tr, TRACE_KSYM,
92 sizeof(*entry), 0, pc);
93 if (!event)
94 return;
95
96 entry = ring_buffer_event_data(event);
97 entry->ip = instruction_pointer(regs);
98 entry->type = hbp->info.type;
99 strlcpy(entry->ksym_name, hbp->info.name, KSYM_SYMBOL_LEN);
100 strlcpy(entry->cmd, current->comm, TASK_COMM_LEN);
101
102#ifdef CONFIG_PROFILE_KSYM_TRACER
103 ksym_collect_stats(hbp->info.address);
104#endif /* CONFIG_PROFILE_KSYM_TRACER */
105
106 trace_buffer_unlock_commit(tr, event, 0, pc);
107}
108
109/* Valid access types are represented as
110 *
111 * rw- : Set Read/Write Access Breakpoint
112 * -w- : Set Write Access Breakpoint
113 * --- : Clear Breakpoints
114 * --x : Set Execution Break points (Not available yet)
115 *
116 */
117static int ksym_trace_get_access_type(char *str)
118{
119 int access = 0;
120
121 if (str[0] == 'r')
122 access += 4;
123 else if (str[0] != '-')
124 return -EINVAL;
125
126 if (str[1] == 'w')
127 access += 2;
128 else if (str[1] != '-')
129 return -EINVAL;
130
131 if (str[2] != '-')
132 return -EINVAL;
133
134 switch (access) {
135 case 6:
136 access = HW_BREAKPOINT_RW;
137 break;
138 case 4:
139 access = -EINVAL;
140 break;
141 case 2:
142 access = HW_BREAKPOINT_WRITE;
143 break;
144 }
145
146 return access;
147}
148
149/*
150 * There can be several possible malformed requests and we attempt to capture
151 * all of them. We enumerate some of the rules
152 * 1. We will not allow kernel symbols with ':' since it is used as a delimiter.
153 * i.e. multiple ':' symbols disallowed. Possible uses are of the form
154 * <module>:<ksym_name>:<op>.
155 * 2. No delimiter symbol ':' in the input string
156 * 3. Spurious operator symbols or symbols not in their respective positions
157 * 4. <ksym_name>:--- i.e. clear breakpoint request when ksym_name not in file
158 * 5. Kernel symbol not a part of /proc/kallsyms
159 * 6. Duplicate requests
160 */
161static int parse_ksym_trace_str(char *input_string, char **ksymname,
162 unsigned long *addr)
163{
164 int ret;
165
166 *ksymname = strsep(&input_string, ":");
167 *addr = kallsyms_lookup_name(*ksymname);
168
169 /* Check for malformed request: (2), (1) and (5) */
170 if ((!input_string) ||
171 (strlen(input_string) != KSYM_TRACER_OP_LEN) ||
172 (*addr == 0))
173 return -EINVAL;;
174
175 ret = ksym_trace_get_access_type(input_string);
176
177 return ret;
178}
179
180int process_new_ksym_entry(char *ksymname, int op, unsigned long addr)
181{
182 struct trace_ksym *entry;
183 int ret = -ENOMEM;
184
185 if (ksym_filter_entry_count >= KSYM_TRACER_MAX) {
186 printk(KERN_ERR "ksym_tracer: Maximum limit:(%d) reached. No"
187 " new requests for tracing can be accepted now.\n",
188 KSYM_TRACER_MAX);
189 return -ENOSPC;
190 }
191
192 entry = kzalloc(sizeof(struct trace_ksym), GFP_KERNEL);
193 if (!entry)
194 return -ENOMEM;
195
196 entry->ksym_hbp = kzalloc(sizeof(struct hw_breakpoint), GFP_KERNEL);
197 if (!entry->ksym_hbp)
198 goto err;
199
200 entry->ksym_hbp->info.name = kstrdup(ksymname, GFP_KERNEL);
201 if (!entry->ksym_hbp->info.name)
202 goto err;
203
204 entry->ksym_hbp->info.type = op;
205 entry->ksym_addr = entry->ksym_hbp->info.address = addr;
206#ifdef CONFIG_X86
207 entry->ksym_hbp->info.len = HW_BREAKPOINT_LEN_4;
208#endif
209 entry->ksym_hbp->triggered = (void *)ksym_hbp_handler;
210
211 ret = register_kernel_hw_breakpoint(entry->ksym_hbp);
212 if (ret < 0) {
213 printk(KERN_INFO "ksym_tracer request failed. Try again"
214 " later!!\n");
215 ret = -EAGAIN;
216 goto err;
217 }
218 hlist_add_head_rcu(&(entry->ksym_hlist), &ksym_filter_head);
219 ksym_filter_entry_count++;
220 return 0;
221err:
222 if (entry->ksym_hbp)
223 kfree(entry->ksym_hbp->info.name);
224 kfree(entry->ksym_hbp);
225 kfree(entry);
226 return ret;
227}
228
229static ssize_t ksym_trace_filter_read(struct file *filp, char __user *ubuf,
230 size_t count, loff_t *ppos)
231{
232 struct trace_ksym *entry;
233 struct hlist_node *node;
234 struct trace_seq *s;
235 ssize_t cnt = 0;
236 int ret;
237
238 s = kmalloc(sizeof(*s), GFP_KERNEL);
239 if (!s)
240 return -ENOMEM;
241 trace_seq_init(s);
242
243 mutex_lock(&ksym_tracer_mutex);
244
245 hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
246 ret = trace_seq_printf(s, "%s:", entry->ksym_hbp->info.name);
247 if (entry->ksym_hbp->info.type == HW_BREAKPOINT_WRITE)
248 ret = trace_seq_puts(s, "-w-\n");
249 else if (entry->ksym_hbp->info.type == HW_BREAKPOINT_RW)
250 ret = trace_seq_puts(s, "rw-\n");
251 WARN_ON_ONCE(!ret);
252 }
253
254 cnt = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
255
256 mutex_unlock(&ksym_tracer_mutex);
257
258 kfree(s);
259
260 return cnt;
261}
262
263static void __ksym_trace_reset(void)
264{
265 struct trace_ksym *entry;
266 struct hlist_node *node, *node1;
267
268 mutex_lock(&ksym_tracer_mutex);
269 hlist_for_each_entry_safe(entry, node, node1, &ksym_filter_head,
270 ksym_hlist) {
271 unregister_kernel_hw_breakpoint(entry->ksym_hbp);
272 ksym_filter_entry_count--;
273 hlist_del_rcu(&(entry->ksym_hlist));
274 synchronize_rcu();
275 kfree(entry->ksym_hbp->info.name);
276 kfree(entry->ksym_hbp);
277 kfree(entry);
278 }
279 mutex_unlock(&ksym_tracer_mutex);
280}
281
282static ssize_t ksym_trace_filter_write(struct file *file,
283 const char __user *buffer,
284 size_t count, loff_t *ppos)
285{
286 struct trace_ksym *entry;
287 struct hlist_node *node;
288 char *input_string, *ksymname = NULL;
289 unsigned long ksym_addr = 0;
290 int ret, op, changed = 0;
291
292 input_string = kzalloc(count + 1, GFP_KERNEL);
293 if (!input_string)
294 return -ENOMEM;
295
296 if (copy_from_user(input_string, buffer, count)) {
297 kfree(input_string);
298 return -EFAULT;
299 }
300 input_string[count] = '\0';
301
302 strstrip(input_string);
303
304 /*
305 * Clear all breakpoints if:
306 * 1: echo > ksym_trace_filter
307 * 2: echo 0 > ksym_trace_filter
308 * 3: echo "*:---" > ksym_trace_filter
309 */
310 if (!input_string[0] || !strcmp(input_string, "0") ||
311 !strcmp(input_string, "*:---")) {
312 __ksym_trace_reset();
313 kfree(input_string);
314 return count;
315 }
316
317 ret = op = parse_ksym_trace_str(input_string, &ksymname, &ksym_addr);
318 if (ret < 0) {
319 kfree(input_string);
320 return ret;
321 }
322
323 mutex_lock(&ksym_tracer_mutex);
324
325 ret = -EINVAL;
326 hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
327 if (entry->ksym_addr == ksym_addr) {
328 /* Check for malformed request: (6) */
329 if (entry->ksym_hbp->info.type != op)
330 changed = 1;
331 else
332 goto out;
333 break;
334 }
335 }
336 if (changed) {
337 unregister_kernel_hw_breakpoint(entry->ksym_hbp);
338 entry->ksym_hbp->info.type = op;
339 if (op > 0) {
340 ret = register_kernel_hw_breakpoint(entry->ksym_hbp);
341 if (ret == 0)
342 goto out;
343 }
344 ksym_filter_entry_count--;
345 hlist_del_rcu(&(entry->ksym_hlist));
346 synchronize_rcu();
347 kfree(entry->ksym_hbp->info.name);
348 kfree(entry->ksym_hbp);
349 kfree(entry);
350 ret = 0;
351 goto out;
352 } else {
353 /* Check for malformed request: (4) */
354 if (op == 0)
355 goto out;
356 ret = process_new_ksym_entry(ksymname, op, ksym_addr);
357 }
358out:
359 mutex_unlock(&ksym_tracer_mutex);
360
361 kfree(input_string);
362
363 if (!ret)
364 ret = count;
365 return ret;
366}
367
368static const struct file_operations ksym_tracing_fops = {
369 .open = tracing_open_generic,
370 .read = ksym_trace_filter_read,
371 .write = ksym_trace_filter_write,
372};
373
374static void ksym_trace_reset(struct trace_array *tr)
375{
376 ksym_tracing_enabled = 0;
377 __ksym_trace_reset();
378}
379
380static int ksym_trace_init(struct trace_array *tr)
381{
382 int cpu, ret = 0;
383
384 for_each_online_cpu(cpu)
385 tracing_reset(tr, cpu);
386 ksym_tracing_enabled = 1;
387 ksym_trace_array = tr;
388
389 return ret;
390}
391
392static void ksym_trace_print_header(struct seq_file *m)
393{
394 seq_puts(m,
395 "# TASK-PID CPU# Symbol "
396 "Type Function\n");
397 seq_puts(m,
398 "# | | | "
399 " | |\n");
400}
401
402static enum print_line_t ksym_trace_output(struct trace_iterator *iter)
403{
404 struct trace_entry *entry = iter->ent;
405 struct trace_seq *s = &iter->seq;
406 struct ksym_trace_entry *field;
407 char str[KSYM_SYMBOL_LEN];
408 int ret;
409
410 if (entry->type != TRACE_KSYM)
411 return TRACE_TYPE_UNHANDLED;
412
413 trace_assign_type(field, entry);
414
415 ret = trace_seq_printf(s, "%11s-%-5d [%03d] %-30s ", field->cmd,
416 entry->pid, iter->cpu, field->ksym_name);
417 if (!ret)
418 return TRACE_TYPE_PARTIAL_LINE;
419
420 switch (field->type) {
421 case HW_BREAKPOINT_WRITE:
422 ret = trace_seq_printf(s, " W ");
423 break;
424 case HW_BREAKPOINT_RW:
425 ret = trace_seq_printf(s, " RW ");
426 break;
427 default:
428 return TRACE_TYPE_PARTIAL_LINE;
429 }
430
431 if (!ret)
432 return TRACE_TYPE_PARTIAL_LINE;
433
434 sprint_symbol(str, field->ip);
435 ret = trace_seq_printf(s, "%s\n", str);
436 if (!ret)
437 return TRACE_TYPE_PARTIAL_LINE;
438
439 return TRACE_TYPE_HANDLED;
440}
441
442struct tracer ksym_tracer __read_mostly =
443{
444 .name = "ksym_tracer",
445 .init = ksym_trace_init,
446 .reset = ksym_trace_reset,
447#ifdef CONFIG_FTRACE_SELFTEST
448 .selftest = trace_selftest_startup_ksym,
449#endif
450 .print_header = ksym_trace_print_header,
451 .print_line = ksym_trace_output
452};
453
454__init static int init_ksym_trace(void)
455{
456 struct dentry *d_tracer;
457 struct dentry *entry;
458
459 d_tracer = tracing_init_dentry();
460 ksym_filter_entry_count = 0;
461
462 entry = debugfs_create_file("ksym_trace_filter", 0644, d_tracer,
463 NULL, &ksym_tracing_fops);
464 if (!entry)
465 pr_warning("Could not create debugfs "
466 "'ksym_trace_filter' file\n");
467
468 return register_tracer(&ksym_tracer);
469}
470device_initcall(init_ksym_trace);
471
472
473#ifdef CONFIG_PROFILE_KSYM_TRACER
474static int ksym_tracer_stat_headers(struct seq_file *m)
475{
476 seq_puts(m, " Access Type ");
477 seq_puts(m, " Symbol Counter\n");
478 seq_puts(m, " ----------- ");
479 seq_puts(m, " ------ -------\n");
480 return 0;
481}
482
483static int ksym_tracer_stat_show(struct seq_file *m, void *v)
484{
485 struct hlist_node *stat = v;
486 struct trace_ksym *entry;
487 int access_type = 0;
488 char fn_name[KSYM_NAME_LEN];
489
490 entry = hlist_entry(stat, struct trace_ksym, ksym_hlist);
491
492 if (entry->ksym_hbp)
493 access_type = entry->ksym_hbp->info.type;
494
495 switch (access_type) {
496 case HW_BREAKPOINT_WRITE:
497 seq_puts(m, " W ");
498 break;
499 case HW_BREAKPOINT_RW:
500 seq_puts(m, " RW ");
501 break;
502 default:
503 seq_puts(m, " NA ");
504 }
505
506 if (lookup_symbol_name(entry->ksym_addr, fn_name) >= 0)
507 seq_printf(m, " %-36s", fn_name);
508 else
509 seq_printf(m, " %-36s", "<NA>");
510 seq_printf(m, " %15lu\n", entry->counter);
511
512 return 0;
513}
514
515static void *ksym_tracer_stat_start(struct tracer_stat *trace)
516{
517 return ksym_filter_head.first;
518}
519
520static void *
521ksym_tracer_stat_next(void *v, int idx)
522{
523 struct hlist_node *stat = v;
524
525 return stat->next;
526}
527
528static struct tracer_stat ksym_tracer_stats = {
529 .name = "ksym_tracer",
530 .stat_start = ksym_tracer_stat_start,
531 .stat_next = ksym_tracer_stat_next,
532 .stat_headers = ksym_tracer_stat_headers,
533 .stat_show = ksym_tracer_stat_show
534};
535
536__init static int ksym_tracer_stat_init(void)
537{
538 int ret;
539
540 ret = register_stat_tracer(&ksym_tracer_stats);
541 if (ret) {
542 printk(KERN_WARNING "Warning: could not register "
543 "ksym tracer stats\n");
544 return 1;
545 }
546
547 return 0;
548}
549fs_initcall(ksym_tracer_stat_init);
550#endif /* CONFIG_PROFILE_KSYM_TRACER */
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index d2cdbabb4ea..7179c12e4f0 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -17,6 +17,7 @@ static inline int trace_valid_entry(struct trace_entry *entry)
17 case TRACE_GRAPH_ENT: 17 case TRACE_GRAPH_ENT:
18 case TRACE_GRAPH_RET: 18 case TRACE_GRAPH_RET:
19 case TRACE_HW_BRANCHES: 19 case TRACE_HW_BRANCHES:
20 case TRACE_KSYM:
20 return 1; 21 return 1;
21 } 22 }
22 return 0; 23 return 0;
@@ -808,3 +809,55 @@ trace_selftest_startup_hw_branches(struct tracer *trace,
808 return ret; 809 return ret;
809} 810}
810#endif /* CONFIG_HW_BRANCH_TRACER */ 811#endif /* CONFIG_HW_BRANCH_TRACER */
812
813#ifdef CONFIG_KSYM_TRACER
814static int ksym_selftest_dummy;
815
816int
817trace_selftest_startup_ksym(struct tracer *trace, struct trace_array *tr)
818{
819 unsigned long count;
820 int ret;
821
822 /* start the tracing */
823 ret = tracer_init(trace, tr);
824 if (ret) {
825 warn_failed_init_tracer(trace, ret);
826 return ret;
827 }
828
829 ksym_selftest_dummy = 0;
830 /* Register the read-write tracing request */
831 ret = process_new_ksym_entry(KSYM_SELFTEST_ENTRY, HW_BREAKPOINT_RW,
832 (unsigned long)(&ksym_selftest_dummy));
833
834 if (ret < 0) {
835 printk(KERN_CONT "ksym_trace read-write startup test failed\n");
836 goto ret_path;
837 }
838 /* Perform a read and a write operation over the dummy variable to
839 * trigger the tracer
840 */
841 if (ksym_selftest_dummy == 0)
842 ksym_selftest_dummy++;
843
844 /* stop the tracing. */
845 tracing_stop();
846 /* check the trace buffer */
847 ret = trace_test_buffer(tr, &count);
848 trace->reset(tr);
849 tracing_start();
850
851 /* read & write operations - one each is performed on the dummy variable
852 * triggering two entries in the trace buffer
853 */
854 if (!ret && count != 2) {
855 printk(KERN_CONT "Ksym tracer startup test failed");
856 ret = -1;
857 }
858
859ret_path:
860 return ret;
861}
862#endif /* CONFIG_KSYM_TRACER */
863
diff --git a/samples/Kconfig b/samples/Kconfig
index 428b065ba69..17d64ba7864 100644
--- a/samples/Kconfig
+++ b/samples/Kconfig
@@ -46,5 +46,11 @@ config SAMPLE_KRETPROBES
46 default m 46 default m
47 depends on SAMPLE_KPROBES && KRETPROBES 47 depends on SAMPLE_KPROBES && KRETPROBES
48 48
49config SAMPLE_HW_BREAKPOINT
50 tristate "Build kernel hardware breakpoint examples -- loadable module only"
51 depends on HAVE_HW_BREAKPOINT && m
52 help
53 This builds kernel hardware breakpoint example modules.
54
49endif # SAMPLES 55endif # SAMPLES
50 56
diff --git a/samples/Makefile b/samples/Makefile
index 13e4b470b53..42e17559877 100644
--- a/samples/Makefile
+++ b/samples/Makefile
@@ -1,3 +1,4 @@
1# Makefile for Linux samples code 1# Makefile for Linux samples code
2 2
3obj-$(CONFIG_SAMPLES) += markers/ kobject/ kprobes/ tracepoints/ trace_events/ 3obj-$(CONFIG_SAMPLES) += markers/ kobject/ kprobes/ tracepoints/ \
4 trace_events/ hw_breakpoint/
diff --git a/samples/hw_breakpoint/Makefile b/samples/hw_breakpoint/Makefile
new file mode 100644
index 00000000000..0f5c31c2fc4
--- /dev/null
+++ b/samples/hw_breakpoint/Makefile
@@ -0,0 +1 @@
obj-$(CONFIG_SAMPLE_HW_BREAKPOINT) += data_breakpoint.o
diff --git a/samples/hw_breakpoint/data_breakpoint.c b/samples/hw_breakpoint/data_breakpoint.c
new file mode 100644
index 00000000000..9cbdbb871b7
--- /dev/null
+++ b/samples/hw_breakpoint/data_breakpoint.c
@@ -0,0 +1,83 @@
1/*
2 * data_breakpoint.c - Sample HW Breakpoint file to watch kernel data address
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * usage: insmod data_breakpoint.ko ksym=<ksym_name>
19 *
20 * This file is a kernel module that places a breakpoint over ksym_name kernel
21 * variable using Hardware Breakpoint register. The corresponding handler which
22 * prints a backtrace is invoked everytime a write operation is performed on
23 * that variable.
24 *
25 * Copyright (C) IBM Corporation, 2009
26 */
27#include <linux/module.h> /* Needed by all modules */
28#include <linux/kernel.h> /* Needed for KERN_INFO */
29#include <linux/init.h> /* Needed for the macros */
30
31#include <asm/hw_breakpoint.h>
32
33struct hw_breakpoint sample_hbp;
34
35static char ksym_name[KSYM_NAME_LEN] = "pid_max";
36module_param_string(ksym, ksym_name, KSYM_NAME_LEN, S_IRUGO);
37MODULE_PARM_DESC(ksym, "Kernel symbol to monitor; this module will report any"
38 " write operations on the kernel symbol");
39
40void sample_hbp_handler(struct hw_breakpoint *temp, struct pt_regs
41 *temp_regs)
42{
43 printk(KERN_INFO "%s value is changed\n", ksym_name);
44 dump_stack();
45 printk(KERN_INFO "Dump stack from sample_hbp_handler\n");
46}
47
48static int __init hw_break_module_init(void)
49{
50 int ret;
51
52#ifdef CONFIG_X86
53 sample_hbp.info.name = ksym_name;
54 sample_hbp.info.type = HW_BREAKPOINT_WRITE;
55 sample_hbp.info.len = HW_BREAKPOINT_LEN_4;
56#endif /* CONFIG_X86 */
57
58 sample_hbp.triggered = (void *)sample_hbp_handler;
59
60 ret = register_kernel_hw_breakpoint(&sample_hbp);
61
62 if (ret < 0) {
63 printk(KERN_INFO "Breakpoint registration failed\n");
64 return ret;
65 } else
66 printk(KERN_INFO "HW Breakpoint for %s write installed\n",
67 ksym_name);
68
69 return 0;
70}
71
72static void __exit hw_break_module_exit(void)
73{
74 unregister_kernel_hw_breakpoint(&sample_hbp);
75 printk(KERN_INFO "HW Breakpoint for %s write uninstalled\n", ksym_name);
76}
77
78module_init(hw_break_module_init);
79module_exit(hw_break_module_exit);
80
81MODULE_LICENSE("GPL");
82MODULE_AUTHOR("K.Prasad");
83MODULE_DESCRIPTION("ksym breakpoint");